{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyPWuwGA+W3PEAJibr/dPzGq", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "45eec1be3a594149903926a6bc428eea": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9078e76353764dceb7cf93bc33a6463d", "IPY_MODEL_fe9a55d7eb6e4013ad1e781b01edd53d", "IPY_MODEL_c7bd4b1f12d549f5a6b5ebb714bf5946" ], "layout": "IPY_MODEL_60ce45c92536449682c51c670b9a05be" } }, "9078e76353764dceb7cf93bc33a6463d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d45ad91af4f042938294340c44188773", "placeholder": "​", "style": "IPY_MODEL_734caeff9c014c27b374922b82fffdd0", "value": "Map: 100%" } }, "fe9a55d7eb6e4013ad1e781b01edd53d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e76209249d094aaa88af45102271bbc1", "max": 6, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_437209703da64cb78ac38c57834d1db0", "value": 6 } }, "c7bd4b1f12d549f5a6b5ebb714bf5946": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_230c23d8b2654debb2405d1fae2bd772", "placeholder": "​", "style": "IPY_MODEL_dd62e1e5d5da48e1887be89a74d50a6e", "value": " 6/6 [00:00<00:00, 223.68 examples/s]" } }, "60ce45c92536449682c51c670b9a05be": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d45ad91af4f042938294340c44188773": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "734caeff9c014c27b374922b82fffdd0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e76209249d094aaa88af45102271bbc1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "437209703da64cb78ac38c57834d1db0": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "230c23d8b2654debb2405d1fae2bd772": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dd62e1e5d5da48e1887be89a74d50a6e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6ec35d98aa3d453aa35d1f3494ff06b0": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0ebd9010697542aa9862eec60acdd03b", "IPY_MODEL_26b44a397c50412696beb5cecc3b7d05", "IPY_MODEL_2f67bd22b78e4fa193816e7db10b2410" ], "layout": "IPY_MODEL_df02dab90c58468c9350094dd517c7b7" } }, "0ebd9010697542aa9862eec60acdd03b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7a2532f556d0452bafd6aef90d06b66d", "placeholder": "​", "style": "IPY_MODEL_bbf4d362165f433fbd39884e143fbd41", "value": "Map: 100%" } }, "26b44a397c50412696beb5cecc3b7d05": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_81ead583e5aa461eb391bd0241bfbefb", "max": 6, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_a1e36b507583414e8e052c0bd9023f74", "value": 6 } }, "2f67bd22b78e4fa193816e7db10b2410": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b3b00fc23c3445cfb24edb9f5cca6a15", "placeholder": "​", "style": "IPY_MODEL_3232892540c24cfab17952583950bbfb", "value": " 6/6 [00:00<00:00, 90.33 examples/s]" } }, "df02dab90c58468c9350094dd517c7b7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7a2532f556d0452bafd6aef90d06b66d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bbf4d362165f433fbd39884e143fbd41": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "81ead583e5aa461eb391bd0241bfbefb": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a1e36b507583414e8e052c0bd9023f74": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b3b00fc23c3445cfb24edb9f5cca6a15": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3232892540c24cfab17952583950bbfb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "whbox1Gs7Azv", "outputId": "c5cb9815-6914-4684-c8d8-4652fabb168e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Successfully logged in as jattman1993\n" ] } ], "source": [ "#login into Instgram using intagraapi\n", "\n", "from instagrapi import Client\n", "from google.colab import userdata\n", "\n", "\n", "# --- USER SETTINGS ---\n", "# Replace with your actual Instagram credentials or use Colab secrets\n", "# Remember to handle your credentials securely, e.g., using environment variables or Colab secrets\n", "USERNAME = \"jattman1993\"\n", "PASSWORD =userdata.get('password')\n", "\n", "# ---------------------\n", "\n", "cl = Client()\n", "\n", "try:\n", " cl.login(USERNAME, PASSWORD)\n", " print(f\"Successfully logged in as {USERNAME}\")\n", "except Exception as e:\n", " print(f\"Error during instagrapi login: {e}\")\n", " print(\"Please make sure your username and password are correct and that you can log in manually.\")\n", " print(\"If prompted for a verification code, please enter it in the output below.\")" ] }, { "cell_type": "code", "source": [ "#reels saver\n", "import time\n", "import random\n", "\n", "# --- USER SETTINGS ---\n", "COLLECTION_NAME = \"Collab Reels\"\n", "TARGET_REELS_COUNT = 100 # Process up to this many reels\n", "MIN_SAVES = 7\n", "MAX_SAVES = 15\n", "# ---------------------\n", "\n", "# Placeholder for personality_profile (replace with your actual profile data if needed)\n", "personality_profile = {} # Or define a dictionary with your personality traits\n", "\n", "saved_reels = []\n", "processed_reels = 0\n", "candidate_reels = [] # Store reels with their scores for ranking\n", "\n", "def personality_save_decision(reel, personality_profile):\n", " \"\"\"\n", " Custom logic for INTJ-T personality based on your specific traits:\n", " - High: Inquisitiveness (90), Aesthetic Appreciation (97), Autonomy (97),\n", " Creativity (83), Intellectual Efficiency (67), Innovation (87)\n", " - Low: Sociability (3-7), Affiliation (13), Social boldness (43)\n", " \"\"\"\n", " score = 0\n", "\n", " # Extract reel attributes safely\n", " tags = getattr(reel, 'tags', []) + getattr(reel, 'hashtags', [])\n", " desc = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''\n", " desc_lower = desc.lower()\n", "\n", " # HIGH INQUISITIVENESS (90) - Knowledge, learning, exploration\n", " intellectual_keywords = ['science', 'research', 'explained', 'how', 'why', 'theory',\n", " 'analysis', 'study', 'facts', 'discovery', 'experiment',\n", " 'psychology', 'philosophy', 'history', 'technology']\n", " if any(word in desc_lower for word in intellectual_keywords):\n", " score += 3\n", "\n", " # HIGH AESTHETIC APPRECIATION (97) - Visual beauty, art, design\n", " aesthetic_keywords = ['aesthetic', 'art', 'design', 'beautiful', 'visual',\n", " 'cinematography', 'photography', 'architecture', 'minimal',\n", " 'composition', 'color', 'artistic']\n", " if any(word in desc_lower for word in aesthetic_keywords):\n", " score += 3\n", "\n", " # HIGH AUTONOMY (97) - Independence, self-reliance, unconventional\n", " autonomy_keywords = ['independent', 'solo', 'self', 'individual', 'unique',\n", " 'unconventional', 'different', 'original', 'personal']\n", " if any(word in desc_lower for word in autonomy_keywords):\n", " score += 2\n", "\n", " # HIGH CREATIVITY (83) & INNOVATION (87) - Creative content, new ideas\n", " creative_keywords = ['creative', 'innovative', 'invention', 'new', 'original',\n", " 'diy', 'build', 'create', 'design', 'craft', 'make']\n", " if any(word in desc_lower for word in creative_keywords):\n", " score += 2\n", "\n", " # HIGH INTELLECTUAL EFFICIENCY (67) - Concise, efficient information\n", " length = getattr(reel, 'video_duration', 0)\n", " if 10 <= length <= 45: # Preference for medium-length, information-dense content\n", " score += 1\n", "\n", " # DEDUCT for LOW SOCIABILITY (3-7) - Avoid highly social content\n", " social_keywords = ['party', 'friends', 'group', 'social', 'together', 'crowd',\n", " 'everyone', 'people', 'community', 'team']\n", " if any(word in desc_lower for word in social_keywords):\n", " score -= 2\n", "\n", " # DEDUCT for LOW AFFILIATION (13) - Avoid relationship/emotional content\n", " emotional_keywords = ['relationship', 'love', 'heart', 'feelings', 'emotional',\n", " 'together', 'couple', 'romantic', 'cute', 'sweet']\n", " if any(word in desc_lower for word in emotional_keywords):\n", " score -= 2\n", "\n", " # BONUS for complexity and depth (matches INTJ preference)\n", " complex_keywords = ['complex', 'deep', 'detailed', 'comprehensive', 'advanced',\n", " 'expert', 'professional', 'technical', 'analysis']\n", " if any(word in desc_lower for word in complex_keywords):\n", " score += 1\n", "\n", " # BONUS for educational/tutorial content (high inquisitiveness)\n", " educational_keywords = ['tutorial', 'learn', 'guide', 'tip', 'hack', 'skill',\n", " 'knowledge', 'education', 'teach', 'lesson']\n", " if any(word in desc_lower for word in educational_keywords):\n", " score += 2\n", "\n", " return score\n", "\n", "print(\"Processing 100 reels and applying personality-based selection...\")\n", "\n", "# Fetch explore reels\n", "try:\n", " explore_reels = cl.explore_reels()\n", " print(f\"Fetched {len(explore_reels)} explore reels.\")\n", "except Exception as e:\n", " print(f\"Error fetching explore reels: {e}\")\n", " explore_reels = [] # Initialize as empty list to avoid NameError later\n", "\n", "# First pass: Score all reels\n", "for reel in explore_reels[:TARGET_REELS_COUNT]:\n", " processed_reels += 1\n", " print(f\"Analyzing Reel {processed_reels}/{TARGET_REELS_COUNT} (ID: {reel.id})...\")\n", "\n", " score = personality_save_decision(reel, personality_profile)\n", " candidate_reels.append((reel, score))\n", " print(f\"Score: {score}\")\n", "\n", "# Sort reels by score (highest first)\n", "candidate_reels.sort(key=lambda x: x[1], reverse=True)\n", "\n", "# Select top reels within the 7-15 range\n", "print(f\"\\nSelecting reels to save (minimum {MIN_SAVES}, maximum {MAX_SAVES})...\")\n", "\n", "# Ensure we get at least MIN_SAVES reels\n", "reels_to_save = min(MAX_SAVES, max(MIN_SAVES, len([r for r in candidate_reels if r[1] > 0])))\n", "\n", "# If we don't have enough positive-scoring reels, take the top-scoring ones anyway\n", "if len([r for r in candidate_reels if r[1] > 0]) < MIN_SAVES:\n", " reels_to_save = MIN_SAVES\n", "\n", "selected_reels = candidate_reels[:reels_to_save]\n", "\n", "print(f\"Selected {len(selected_reels)} reels to save based on personality match.\")\n", "\n", "# --- SAVE SELECTED REELS TO COLLECTION ---\n", "\n", "print(f\"\\nAttempting to save selected reels to the '{COLLECTION_NAME}' collection...\")\n", "\n", "try:\n", " print(f\"Looking for existing collection: '{COLLECTION_NAME}'...\")\n", " collections = cl.collections()\n", " collab_collection = next((c for c in collections if c.name == COLLECTION_NAME), None)\n", "\n", " if not collab_collection:\n", " print(f\"Collection '{COLLECTION_NAME}' not found. Please create it manually on Instagram.\")\n", " print(\"Skipping adding media to collection.\")\n", " else:\n", " print(f\"Using existing collection: {COLLECTION_NAME} (ID: {collab_collection.id})\")\n", "\n", " for i, (reel, score) in enumerate(selected_reels, 1):\n", " try:\n", " # CORRECTED: Use media_save instead of collection_add_media\n", " cl.media_save(reel.id, collab_collection.id)\n", " print(f\"Saved reel {i}/{len(selected_reels)} (ID: {reel.id}, Score: {score}) to '{COLLECTION_NAME}'.\")\n", " saved_reels.append(reel.id)\n", " time.sleep(random.uniform(1, 2)) # Human-like delay\n", " except Exception as save_error:\n", " print(f\"Could not save reel {reel.id} to collection: {save_error}\")\n", "\n", "except Exception as e:\n", " print(f\"An error occurred during collection management: {e}\")\n", "\n", "print(f\"\\nProcessing complete!\")\n", "print(f\"Total reels analyzed: {processed_reels}\")\n", "print(f\"Total reels saved to collection: {len(saved_reels)}\")\n", "print(f\"Saved reels: {saved_reels}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "b6CQojfc7BfS", "outputId": "90e34588-df5b-4449-e516-49bbf08d2b77" }, "execution_count": 10, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Processing 100 reels and applying personality-based selection...\n", "Fetched 10 explore reels.\n", "Analyzing Reel 1/100 (ID: 3662134177216259058_73901491628)...\n", "Score: 0\n", "Analyzing Reel 2/100 (ID: 3649275160120309436_64703355772)...\n", "Score: 1\n", "Analyzing Reel 3/100 (ID: 3647708429135219905_74996099567)...\n", "Score: 1\n", "Analyzing Reel 4/100 (ID: 3661305151730419580_74829121084)...\n", "Score: 1\n", "Analyzing Reel 5/100 (ID: 3661562890869692956_70339477089)...\n", "Score: 4\n", "Analyzing Reel 6/100 (ID: 3596378784200721531_61221585315)...\n", "Score: 1\n", "Analyzing Reel 7/100 (ID: 3656188996830676738_71478687552)...\n", "Score: 0\n", "Analyzing Reel 8/100 (ID: 3660403760326938347_53789360284)...\n", "Score: 1\n", "Analyzing Reel 9/100 (ID: 3587235477495027432_57311022889)...\n", "Score: 6\n", "Analyzing Reel 10/100 (ID: 3662134540459833537_73901491628)...\n", "Score: 0\n", "\n", "Selecting reels to save (minimum 7, maximum 15)...\n", "Selected 7 reels to save based on personality match.\n", "\n", "Attempting to save selected reels to the 'Collab Reels' collection...\n", "Looking for existing collection: 'Collab Reels'...\n", "Using existing collection: Collab Reels (ID: 17886194994184734)\n", "Saved reel 1/7 (ID: 3587235477495027432_57311022889, Score: 6) to 'Collab Reels'.\n", "Saved reel 2/7 (ID: 3661562890869692956_70339477089, Score: 4) to 'Collab Reels'.\n", "Saved reel 3/7 (ID: 3649275160120309436_64703355772, Score: 1) to 'Collab Reels'.\n", "Saved reel 4/7 (ID: 3647708429135219905_74996099567, Score: 1) to 'Collab Reels'.\n", "Saved reel 5/7 (ID: 3661305151730419580_74829121084, Score: 1) to 'Collab Reels'.\n", "Saved reel 6/7 (ID: 3596378784200721531_61221585315, Score: 1) to 'Collab Reels'.\n", "Saved reel 7/7 (ID: 3660403760326938347_53789360284, Score: 1) to 'Collab Reels'.\n", "\n", "Processing complete!\n", "Total reels analyzed: 10\n", "Total reels saved to collection: 7\n", "Saved reels: ['3587235477495027432_57311022889', '3661562890869692956_70339477089', '3649275160120309436_64703355772', '3647708429135219905_74996099567', '3661305151730419580_74829121084', '3596378784200721531_61221585315', '3660403760326938347_53789360284']\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install instagrapi transformers torch matplotlib --quiet\n" ], "metadata": { "id": "ogRpbsnQ4xW2" }, "execution_count": 12, "outputs": [] }, { "cell_type": "code", "source": [ "from instagrapi import Client\n", "from transformers import pipeline\n", "import matplotlib.pyplot as plt\n" ], "metadata": { "id": "4YO2pvti74Ok" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "source": [ "# reel fething function ( calls 10 reels from the fyp page)\n", "try:\n", " explore_reels = cl.explore_reels()\n", " print(f\"Fetched {len(explore_reels)} explore reels.\")\n", "except Exception as e:\n", " print(f\"Error fetching explore reels for analysis: {e}\")\n", " explore_reels = []\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gQDrMaHe78N6", "outputId": "0a88fd0c-f19c-4a6e-8479-3143104c5c4f" }, "execution_count": 24, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Fetched 10 explore reels.\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "_WDY8oQFWy9y" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from transformers import (\n", " pipeline,\n", " AutoTokenizer,\n", " AutoModelForSequenceClassification,\n", " Trainer,\n", " TrainingArguments,\n", " RobertaForSequenceClassification,\n", " AlbertForSequenceClassification\n", ")\n", "from datasets import Dataset, Features, Value\n", "import pandas as pd\n", "import torch\n", "import emoji\n", "import re\n", "from collections import Counter\n", "import numpy as np\n", "from sklearn.metrics import accuracy_score, f1_score\n", "\n", "# Configuration\n", "CONFIG = {\n", " \"max_length\": 128,\n", " \"batch_size\": 16,\n", " \"learning_rate\": 2e-5,\n", " \"num_train_epochs\": 3,\n", " \"few_shot_examples\": 5, # per class\n", " \"confidence_threshold\": 0.7,\n", " \"neutral_reanalysis_threshold\": 0.33\n", "}\n", "\n", "class ReelSentimentAnalyzer:\n", " def __init__(self):\n", " self.device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", " self._initialize_models()\n", "\n", " def _initialize_models(self):\n", " \"\"\"Initialize and configure all models\"\"\"\n", " # English models\n", " self.emotion_tokenizer = AutoTokenizer.from_pretrained(\"finiteautomata/bertweet-base-emotion-analysis\")\n", " self.emotion_model = AutoModelForSequenceClassification.from_pretrained(\n", " \"finiteautomata/bertweet-base-emotion-analysis\"\n", " ).to(self.device)\n", "\n", " self.sentiment_tokenizer = AutoTokenizer.from_pretrained(\"cardiffnlp/twitter-roberta-base-sentiment-latest\")\n", " self.sentiment_model = RobertaForSequenceClassification.from_pretrained(\n", " \"cardiffnlp/twitter-roberta-base-sentiment-latest\",\n", " ignore_mismatched_sizes=True\n", " ).to(self.device)\n", "\n", " # Hindi/English model (we'll fine-tune this)\n", " self.hindi_tokenizer = AutoTokenizer.from_pretrained(\"ai4bharat/indic-bert\")\n", " self.hindi_model = AlbertForSequenceClassification.from_pretrained(\n", " \"ai4bharat/indic-bert\",\n", " num_labels=3,\n", " id2label={0: \"negative\", 1: \"neutral\", 2: \"positive\"},\n", " label2id={\"negative\": 0, \"neutral\": 1, \"positive\": 2}\n", " ).to(self.device)\n", " # Store label2id mapping for easy access\n", " self.hindi_label2id = self.hindi_model.config.label2id\n", "\n", "\n", " # Emotion to sentiment mapping\n", " self.emotion_map = {\n", " \"joy\": \"positive\", \"love\": \"positive\", \"happy\": \"positive\",\n", " \"anger\": \"negative\", \"sadness\": \"negative\", \"fear\": \"negative\",\n", " \"surprise\": \"neutral\", \"neutral\": \"neutral\"\n", " }\n", "\n", " # Neutral keywords\n", " self.neutral_keywords = {\n", " \"ad\", \"sponsored\", \"promo\", \"sale\", \"discount\", \"offer\",\n", " \"विज्ञापन\", \"प्रचार\", \"ऑफर\", \"डिस्काउंट\", \"बिक्री\"\n", " }\n", "\n", " def train_hindi_model(self, train_data, eval_data=None):\n", " \"\"\"\n", " Fine-tune the Hindi/English model on labeled data\n", " Args:\n", " train_data: List of dicts [{\"text\": \"...\", \"label\": \"positive/negative/neutral\"}]\n", " eval_data: Optional evaluation data\n", " \"\"\"\n", " # Convert to dataset\n", " train_dataset = Dataset.from_pandas(pd.DataFrame(train_data))\n", "\n", " # Map string labels to integer IDs\n", " def map_labels_to_ids(examples):\n", " examples[\"label\"] = [self.hindi_label2id[label] for label in examples[\"label\"]]\n", " return examples\n", "\n", " train_dataset = train_dataset.map(map_labels_to_ids, batched=True)\n", "\n", " # Explicitly set the label column to integer type\n", " train_dataset = train_dataset.cast_column(\"label\", Value(\"int64\"))\n", "\n", "\n", " def tokenize_function(examples):\n", " return self.hindi_tokenizer(\n", " examples[\"text\"],\n", " padding=\"max_length\",\n", " truncation=True,\n", " max_length=CONFIG[\"max_length\"]\n", " )\n", "\n", " tokenized_train = train_dataset.map(tokenize_function, batched=True)\n", "\n", " # Training arguments - using eval_strategy instead of evaluation_strategy\n", " training_args = TrainingArguments(\n", " output_dir=\"./results\",\n", " eval_strategy=\"epoch\" if eval_data else \"no\",\n", " per_device_train_batch_size=CONFIG[\"batch_size\"],\n", " per_device_eval_batch_size=CONFIG[\"batch_size\"],\n", " learning_rate=CONFIG[\"learning_rate\"],\n", " num_train_epochs=CONFIG[\"num_train_epochs\"],\n", " weight_decay=0.01,\n", " save_strategy=\"no\",\n", " logging_dir='./logs',\n", " logging_steps=10,\n", " report_to=\"none\"\n", " )\n", "\n", " # Compute metrics function\n", " def compute_metrics(p):\n", " predictions, labels = p\n", " predictions = np.argmax(predictions, axis=1)\n", " return {\n", " \"accuracy\": accuracy_score(labels, predictions),\n", " \"f1\": f1_score(labels, predictions, average=\"weighted\")\n", " }\n", "\n", " # Trainer\n", " eval_dataset_processed = None\n", " if eval_data:\n", " eval_dataset = Dataset.from_pandas(pd.DataFrame(eval_data))\n", " eval_dataset = eval_dataset.map(map_labels_to_ids, batched=True)\n", " eval_dataset_processed = eval_dataset.cast_column(\"label\", Value(\"int64\")).map(tokenize_function, batched=True)\n", "\n", "\n", " trainer = Trainer(\n", " model=self.hindi_model,\n", " args=training_args,\n", " train_dataset=tokenized_train,\n", " eval_dataset=eval_dataset_processed,\n", " compute_metrics=compute_metrics if eval_data else None,\n", " )\n", "\n", " # Train\n", " trainer.train()\n", "\n", " # Save the fine-tuned model\n", " self.hindi_model.save_pretrained(\"./fine_tuned_hindi_sentiment\")\n", " self.hindi_tokenizer.save_pretrained(\"./fine_tuned_hindi_sentiment\")\n", "\n", " def preprocess_text(self, text):\n", " \"\"\"Enhanced text cleaning with multilingual support\"\"\"\n", " if not text:\n", " return \"\"\n", "\n", " # Convert emojis to text\n", " text = emoji.demojize(text, delimiters=(\" \", \" \"))\n", "\n", " # Remove URLs and mentions\n", " text = re.sub(r\"http\\S+|@\\w+\", \"\", text)\n", "\n", " # Expand common abbreviations\n", " abbrevs = {\n", " r\"\\bomg\\b\": \"oh my god\",\n", " r\"\\btbh\\b\": \"to be honest\",\n", " r\"\\bky\\b\": \"kyun\", # Hindi 'why'\n", " r\"\\bkb\\b\": \"kab\", # Hindi 'when'\n", " }\n", " for pattern, replacement in abbrevs.items():\n", " text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)\n", "\n", " return text.strip()\n", "\n", " def detect_language(self, text):\n", " \"\"\"Improved language detection\"\"\"\n", " if re.search(r\"[\\u0900-\\u097F]\", text): # Devanagari\n", " return \"hi\"\n", " elif any(re.search(rf\"\\b{kw}\\b\", text.lower()) for kw in [\"hai\", \"kyun\", \"nahi\"]): # Hinglish\n", " return \"hi-latin\"\n", " return \"en\"\n", "\n", " def analyze_content(self, text):\n", " \"\"\"Main analysis function with improved confidence handling\"\"\"\n", " processed = self.preprocess_text(text)\n", " lang = self.detect_language(processed)\n", "\n", " # Check for neutral keywords first\n", " if any(re.search(rf\"\\b{kw}\\b\", processed.lower()) for kw in self.neutral_keywords):\n", " return \"neutral\", 0.95, {\"reason\": \"neutral_keyword\"}\n", "\n", " try:\n", " if lang in (\"hi\", \"hi-latin\"):\n", " return self._analyze_hindi_content(processed)\n", " else:\n", " return self._analyze_english_content(processed)\n", " except Exception as e:\n", " print(f\"Analysis error: {e}\")\n", " return \"neutral\", 0.5, {\"error\": str(e)}\n", "\n", " def _analyze_hindi_content(self, text):\n", " \"\"\"Analyze Hindi content with fine-tuned model\"\"\"\n", " inputs = self.hindi_tokenizer(\n", " text,\n", " return_tensors=\"pt\",\n", " truncation=True,\n", " padding=True,\n", " max_length=CONFIG[\"max_length\"]\n", " ).to(self.device)\n", "\n", " with torch.no_grad():\n", " outputs = self.hindi_model(**inputs)\n", "\n", " probs = torch.nn.functional.softmax(outputs.logits, dim=-1)\n", " pred_idx = torch.argmax(probs).item()\n", " confidence = probs[0][pred_idx].item()\n", "\n", " label = self.hindi_model.config.id2label[pred_idx]\n", " return label, confidence, {\"model\": \"fine-tuned-indic-bert\"}\n", "\n", " def _analyze_english_content(self, text):\n", " \"\"\"Analyze English content with ensemble approach\"\"\"\n", " # Emotion analysis\n", " emotion_inputs = self.emotion_tokenizer(\n", " text,\n", " return_tensors=\"pt\",\n", " truncation=True,\n", " max_length=CONFIG[\"max_length\"]\n", " ).to(self.device)\n", "\n", " with torch.no_grad():\n", " emotion_outputs = self.emotion_model(**emotion_inputs)\n", "\n", " emotion_probs = torch.nn.functional.softmax(emotion_outputs.logits, dim=-1)\n", " emotion_pred = torch.argmax(emotion_probs).item()\n", " emotion_label = self.emotion_model.config.id2label[emotion_pred]\n", " emotion_score = emotion_probs[0][emotion_pred].item()\n", "\n", " # Sentiment analysis\n", " sentiment_inputs = self.sentiment_tokenizer(\n", " text,\n", " return_tensors=\"pt\",\n", " truncation=True,\n", " max_length=CONFIG[\"max_length\"]\n", " ).to(self.device)\n", "\n", " with torch.no_grad():\n", " sentiment_outputs = self.sentiment_model(**sentiment_inputs)\n", "\n", " sentiment_probs = torch.nn.functional.softmax(sentiment_outputs.logits, dim=-1)\n", " sentiment_pred = torch.argmax(sentiment_probs).item()\n", " sentiment_label = self.sentiment_model.config.id2label[sentiment_pred].lower()\n", " sentiment_score = sentiment_probs[0][sentiment_pred].item()\n", "\n", " # Combine results\n", " mapped_emotion = self.emotion_map.get(emotion_label, \"neutral\")\n", "\n", " if sentiment_score > CONFIG[\"confidence_threshold\"]:\n", " final_label = sentiment_label\n", " final_confidence = sentiment_score\n", " elif emotion_score > CONFIG[\"confidence_threshold\"] and mapped_emotion != \"neutral\":\n", " final_label = mapped_emotion\n", " final_confidence = emotion_score\n", " else:\n", " # Weighted average fallback\n", " emotion_weight = 0.6 if mapped_emotion != \"neutral\" else 0.3\n", " sentiment_weight = 0.4 if sentiment_label != \"neutral\" else 0.2\n", " neutral_weight = 0.3\n", "\n", " pos_score = (emotion_weight * (mapped_emotion == \"positive\") +\n", " sentiment_weight * (sentiment_label == \"positive\"))\n", " neg_score = (emotion_weight * (mapped_emotion == \"negative\") +\n", " sentiment_weight * (sentiment_label == \"negative\"))\n", "\n", " if pos_score > neg_score and pos_score > neutral_weight:\n", " final_label = \"positive\"\n", " final_confidence = (pos_score / (pos_score + neg_score + neutral_weight)) * 0.8\n", " elif neg_score > pos_score and neg_score > neutral_weight:\n", " final_label = \"negative\"\n", " final_confidence = (neg_score / (pos_score + neg_score + neutral_weight)) * 0.8\n", " else:\n", " final_label = \"neutral\"\n", " final_confidence = 0.7\n", "\n", " return final_label, final_confidence, {\n", " \"emotion\": emotion_label,\n", " \"sentiment\": sentiment_label,\n", " \"model\": \"ensemble\"\n", " }\n", "\n", " def analyze_reels(self, reels, max_to_analyze=100):\n", " \"\"\"Batch analysis with improved neutral handling\"\"\"\n", " results = Counter()\n", " detailed_results = []\n", "\n", " for reel in reels[:max_to_analyze]:\n", " caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''\n", " label, confidence, details = self.analyze_content(caption)\n", " results[label] += 1\n", " detailed_results.append({\n", " \"text\": caption,\n", " \"label\": label,\n", " \"confidence\": confidence,\n", " \"details\": details\n", " })\n", "\n", " # Post-analysis neutral reduction\n", " if results[\"neutral\"] / sum(results.values()) > CONFIG[\"neutral_reanalysis_threshold\"]:\n", " self._reduce_neutrals(results, detailed_results)\n", "\n", " return results, detailed_results\n", "\n", " def _reduce_neutrals(self, results, detailed_results):\n", " \"\"\"Apply additional techniques to reduce neutral classifications\"\"\"\n", " for item in detailed_results:\n", " if item[\"label\"] == \"neutral\" and item[\"confidence\"] < 0.8:\n", " # Try keyword analysis\n", " text_lower = item[\"text\"].lower()\n", " pos_keywords = {\"great\", \"awesome\", \"love\", \"best\", \"शानदार\", \"अद्भुत\"}\n", " neg_keywords = {\"bad\", \"worst\", \"hate\", \"खराब\", \"बेकार\"}\n", "\n", " pos_count = sum(1 for kw in pos_keywords if kw in text_lower)\n", " neg_count = sum(1 for kw in neg_keywords if kw in text_lower)\n", "\n", " if pos_count > neg_count and pos_count >= 2:\n", " results[\"neutral\"] -= 1\n", " results[\"positive\"] += 1\n", " item.update({\n", " \"label\": \"positive\",\n", " \"confidence\": min(0.9, item[\"confidence\"] + 0.3),\n", " \"reanalyzed\": True\n", " })\n", " elif neg_count > pos_count and neg_count >= 2:\n", " results[\"neutral\"] -= 1\n", " results[\"negative\"] += 1\n", " item.update({\n", " \"label\": \"negative\",\n", " \"confidence\": min(0.9, item[\"confidence\"] + 0.3),\n", " \"reanalyzed\": True\n", " })\n", "\n", "# Example usage\n", "if __name__ == \"__main__\":\n", " # Initialize analyzer\n", " analyzer = ReelSentimentAnalyzer()\n", "\n", " # Example training (in practice you'd load real labeled data)\n", " train_data = [\n", " {\"text\": \"I love this product!\", \"label\": \"positive\"},\n", " {\"text\": \"This is terrible quality\", \"label\": \"negative\"},\n", " {\"text\": \"Just sharing my order details\", \"label\": \"neutral\"},\n", " {\"text\": \"यह उत्पाद अद्भुत है!\", \"label\": \"positive\"},\n", " {\"text\": \"बहुत खराब गुणवत्ता\", \"label\": \"negative\"},\n", " {\"text\": \"यह एक सामान्य उत्पाद है\", \"label\": \"neutral\"} # Added a neutral Hindi example\n", " ]\n", "\n", " # Fine-tune the Hindi model\n", " print(\"Starting Hindi model training...\")\n", " analyzer.train_hindi_model(train_data)\n", " print(\"Hindi model training complete.\")\n", "\n", "\n", " # Analyze some reels (mock data)\n", " class MockReel:\n", " def __init__(self, caption):\n", " self.caption_text = caption\n", "\n", " mock_reels = [\n", " MockReel(\"This is amazing! Love it so much 😍\"),\n", " MockReel(\"Not happy with the quality at all\"),\n", " MockReel(\"Check out our new sale - 50% off everything\"),\n", " MockReel(\"मुझे यह उत्पाद पसंद नहीं आया\"), # Hindi: I didn't like this product\n", " MockReel(\"यह एक सामान्य रील है\"), # Hindi: This is a normal reel\n", " MockReel(\"Great weather today! Awesome vibes.\"),\n", " MockReel(\"Worst movie ever. Hate it.\")\n", " ]\n", "\n", " print(\"\\nAnalyzing mock reels...\")\n", " results, details = analyzer.analyze_reels(mock_reels)\n", " print(\"\\nFinal Results:\")\n", " for label, count in results.items():\n", " print(f\"{label}: {count}\")\n", "\n", " print(\"\\nDetailed Results:\")\n", " for item in details:\n", " print(f\"Text: {item['text'][:50]}...\")\n", " print(f\" Label: {item['label']} (Confidence: {item['confidence']:.2f})\")\n", " print(f\" Details: {item['details']}\")\n", " if 'reanalyzed' in item:\n", " print(\" Reanalyzed: Yes\")\n", " print(\"-\" * 20)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 954, "referenced_widgets": [ "45eec1be3a594149903926a6bc428eea", "9078e76353764dceb7cf93bc33a6463d", "fe9a55d7eb6e4013ad1e781b01edd53d", "c7bd4b1f12d549f5a6b5ebb714bf5946", "60ce45c92536449682c51c670b9a05be", "d45ad91af4f042938294340c44188773", "734caeff9c014c27b374922b82fffdd0", "e76209249d094aaa88af45102271bbc1", "437209703da64cb78ac38c57834d1db0", "230c23d8b2654debb2405d1fae2bd772", "dd62e1e5d5da48e1887be89a74d50a6e", "6ec35d98aa3d453aa35d1f3494ff06b0", "0ebd9010697542aa9862eec60acdd03b", "26b44a397c50412696beb5cecc3b7d05", "2f67bd22b78e4fa193816e7db10b2410", "df02dab90c58468c9350094dd517c7b7", "7a2532f556d0452bafd6aef90d06b66d", "bbf4d362165f433fbd39884e143fbd41", "81ead583e5aa461eb391bd0241bfbefb", "a1e36b507583414e8e052c0bd9023f74", "b3b00fc23c3445cfb24edb9f5cca6a15", "3232892540c24cfab17952583950bbfb" ] }, "outputId": "02e307ca-8c6c-4103-baed-a5c41196164d", "id": "QuJyYbDdWzQ0" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Starting Hindi model training...\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Map: 0%| | 0/6 [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [3/3 00:16, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss

" ] }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": [ "Hindi model training complete.\n", "\n", "Analyzing mock reels...\n", "\n", "Final Results:\n", "positive: 2\n", "negative: 4\n", "neutral: 1\n", "\n", "Detailed Results:\n", "Text: This is amazing! Love it so much 😍...\n", " Label: positive (Confidence: 0.99)\n", " Details: {'emotion': 'joy', 'sentiment': 'positive', 'model': 'ensemble'}\n", "--------------------\n", "Text: Not happy with the quality at all...\n", " Label: negative (Confidence: 0.90)\n", " Details: {'emotion': 'others', 'sentiment': 'negative', 'model': 'ensemble'}\n", "--------------------\n", "Text: Check out our new sale - 50% off everything...\n", " Label: neutral (Confidence: 0.95)\n", " Details: {'reason': 'neutral_keyword'}\n", "--------------------\n", "Text: मुझे यह उत्पाद पसंद नहीं आया...\n", " Label: negative (Confidence: 0.34)\n", " Details: {'model': 'fine-tuned-indic-bert'}\n", "--------------------\n", "Text: यह एक सामान्य रील है...\n", " Label: negative (Confidence: 0.34)\n", " Details: {'model': 'fine-tuned-indic-bert'}\n", "--------------------\n", "Text: Great weather today! Awesome vibes....\n", " Label: positive (Confidence: 0.99)\n", " Details: {'emotion': 'joy', 'sentiment': 'positive', 'model': 'ensemble'}\n", "--------------------\n", "Text: Worst movie ever. Hate it....\n", " Label: negative (Confidence: 0.95)\n", " Details: {'emotion': 'others', 'sentiment': 'negative', 'model': 'ensemble'}\n", "--------------------\n" ] } ] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "\n", "def plot_sentiment_pie(results, title=\"Reels Sentiment Analysis\"):\n", " \"\"\"\n", " Creates a pie chart from sentiment analysis results\n", "\n", " Args:\n", " results: Counter object or dict with 'positive', 'neutral', 'negative' keys\n", " title: Chart title (default: \"Reels Sentiment Analysis\")\n", " \"\"\"\n", " # Prepare data\n", " labels = ['Positive', 'Neutral', 'Negative']\n", " sizes = [results['positive'], results['neutral'], results['negative']]\n", " colors = ['#4CAF50', '#FFC107', '#F44336'] # Green, Yellow, Red\n", " explode = (0.05, 0, 0.05) # Slight highlight on positive and negative\n", "\n", " # Create figure\n", " fig, ax = plt.subplots(figsize=(8, 6))\n", " ax.pie(sizes, explode=explode, labels=labels, colors=colors,\n", " autopct='%1.1f%%', shadow=True, startangle=140,\n", " textprops={'fontsize': 12})\n", "\n", " # Equal aspect ratio ensures pie is drawn as circle\n", " ax.axis('equal')\n", "\n", " # Add title and styling\n", " plt.title(title, fontsize=16, pad=20)\n", " plt.tight_layout()\n", "\n", " return fig\n", "\n", "# Example usage with the previous analyzer results\n", "if __name__ == \"__main__\":\n", " # (Assuming you've already run the analyzer and have results)\n", " # results, details = analyzer.analyze_reels(mock_reels)\n", "\n", " # Mock results for demonstration\n", " example_results = {\n", " 'positive': 45,\n", " 'neutral': 30,\n", " 'negative': 25\n", " }\n", "\n", " # Generate and show the pie chart\n", " chart = plot_sentiment_pie(example_results,\n", " title=\"Instagram Reels Sentiment Distribution\")\n", " plt.show()\n", "\n", " # To save the chart:\n", " # chart.savefig('reels_sentiment.png', dpi=300, bbox_inches='tight')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 607 }, "id": "QQS7wMMO8N7o", "outputId": "4fe8c162-4c7b-4d15-c364-eb8ba4944669" }, "execution_count": 28, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "

" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "# Reels Content\n", "from transformers import pipeline\n", "import re\n", "from collections import Counter\n", "import matplotlib.pyplot as plt\n", "\n", "# Load classification model\n", "classifier = pipeline(\n", " \"zero-shot-classification\",\n", " model=\"facebook/bart-large-mnli\"\n", ")\n", "\n", "# Content categories\n", "content_categories = [\n", " \"news\",\n", " \"meme\",\n", " \"sports\",\n", " \"science\",\n", " \"music\",\n", " \"movie\",\n", " \"gym\",\n", " \"comedy\",\n", " \"food\",\n", " \"technology\"\n", "]\n", "\n", "# Keyword shortcuts\n", "category_keywords = {\n", " \"news\": {\"news\", \"update\", \"breaking\", \"reported\"},\n", " \"meme\": {\"meme\", \"funny\", \"lol\", \"haha\"},\n", " \"sports\": {\"sports\", \"cricket\", \"football\", \"match\"},\n", " \"science\": {\"science\", \"research\", \"discovery\"},\n", " \"music\": {\"music\", \"song\", \"album\", \"release\"},\n", " \"movie\": {\"movie\", \"film\", \"bollywood\", \"trailer\"},\n", " \"gym\": {\"gym\", \"workout\", \"fitness\"},\n", " \"comedy\": {\"comedy\", \"joke\", \"humor\"},\n", " \"food\": {\"food\", \"recipe\", \"cooking\"},\n", " \"technology\": {\"tech\", \"phone\", \"computer\", \"ai\"}\n", "}\n", "\n", "def preprocess_text(text):\n", " \"\"\"Basic text cleaning\"\"\"\n", " return re.sub(r\"http\\S+|@\\w+\", \"\", text.lower()).strip() if text else \"\"\n", "\n", "def classify_reel(text):\n", " \"\"\"Fast classification\"\"\"\n", " processed = preprocess_text(text)\n", "\n", " # Keyword matching\n", " for category, keywords in category_keywords.items():\n", " if any(keyword in processed for keyword in keywords):\n", " return category\n", "\n", " # Model classification\n", " if len(processed.split()) >= 3:\n", " try:\n", " result = classifier(processed[:512], content_categories)\n", " return result['labels'][0]\n", " except:\n", " pass\n", "\n", " return \"other\"\n", "\n", "def plot_category_distribution(counter, title=\"Reels Content Distribution\"):\n", " \"\"\"Generate pie chart from category counts\"\"\"\n", " # Prepare data\n", " labels = []\n", " sizes = []\n", "\n", " # Separate larger categories from others\n", " threshold = sum(counter.values()) * 0.05 # 5% threshold\n", " other_count = 0\n", "\n", " for category, count in counter.most_common():\n", " if count >= threshold and category != \"other\":\n", " labels.append(category.title())\n", " sizes.append(count)\n", " else:\n", " other_count += count\n", "\n", " if other_count > 0:\n", " labels.append(\"Other\")\n", " sizes.append(other_count)\n", "\n", " # Create pie chart\n", " plt.figure(figsize=(10, 8))\n", " plt.pie(\n", " sizes,\n", " labels=labels,\n", " autopct='%1.1f%%',\n", " startangle=140,\n", " colors=plt.cm.Pastel1.colors,\n", " wedgeprops={'edgecolor': 'white', 'linewidth': 1}\n", " )\n", "\n", " plt.title(title, pad=20, fontsize=15)\n", " plt.axis('equal') # Equal aspect ratio ensures pie is circular\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "def analyze_and_visualize(reels, max_to_analyze=100):\n", " \"\"\"Complete analysis with visualization\"\"\"\n", " category_counts = Counter()\n", "\n", " print(\"⏳ Analyzing reels...\")\n", " for reel in reels[:max_to_analyze]:\n", " caption = getattr(reel, 'caption_text', '') or getattr(reel, 'caption', '') or ''\n", " category = classify_reel(caption)\n", " category_counts[category] += 1\n", "\n", " print(\"\\n✅ Analysis complete!\")\n", " print(\"\\n📊 Category Counts:\")\n", " for category, count in category_counts.most_common():\n", " print(f\"- {category.title()}: {count}\")\n", "\n", " # Generate visualization\n", " plot_category_distribution(category_counts)\n", "\n", " return category_counts\n", "\n", "# Usage\n", "results = analyze_and_visualize(explore_reels)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, "id": "El_UugqBNggp", "outputId": "0ffb2936-9f88-4c44-cd5f-13759cb64572" }, "execution_count": 17, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Device set to use cpu\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "⏳ Analyzing reels...\n", "\n", "✅ Analysis complete!\n", "\n", "📊 Category Counts:\n", "- Other: 3\n", "- News: 2\n", "- Technology: 2\n", "- Comedy: 1\n", "- Meme: 1\n", "- Movie: 1\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] } ] }