diff --git a/Instagram/Instagram_Get_comments_from_post.ipynb b/Instagram/Instagram_Get_comments_from_post.ipynb new file mode 100644 index 0000000000..811aa92130 --- /dev/null +++ b/Instagram/Instagram_Get_comments_from_post.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "rocky-cardiff", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "judicial-headline", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Instagram - Get comments from post\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "1cef8cab-e783-4589-b2c4-c21ee380c773", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #instagram #likes #comments #snippet #content" + ] + }, + { + "cell_type": "markdown", + "id": "naas-author", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "8edddd04-a2af-47f7-82aa-a9108cdcd3d4", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-10 (Created: 2024-07-10)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to extract comments from an Instagram post." + ] + }, + { + "cell_type": "markdown", + "id": "88ed8bb2-2694-4848-a3ef-afc0f4e65e07", + "metadata": {}, + "source": [ + "### How to retrive API key with apify" + ] + }, + { + "cell_type": "markdown", + "id": "0fca1344-877b-417d-94f0-1f024a029523", + "metadata": {}, + "source": [ + "1. Go to https://apify.com.\n", + "2. Click \"Sign up for free\" and use your google account to sign up.\n", + "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n", + "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n", + "5. Copy that token and use it to extract data!" + ] + }, + { + "cell_type": "markdown", + "id": "input_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "import_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.144020Z", + "iopub.status.busy": "2024-07-24T09:17:53.143601Z", + "iopub.status.idle": "2024-07-24T09:17:53.884427Z", + "shell.execute_reply": "2024-07-24T09:17:53.883642Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.143947Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import json\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "id": "5c3c12ca-5f3e-411a-aa54-c2b4b612a91d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-03-17T10:12:43.371273Z", + "iopub.status.busy": "2022-03-17T10:12:43.371011Z", + "iopub.status.idle": "2022-03-17T10:12:43.374551Z", + "shell.execute_reply": "2022-03-17T10:12:43.373882Z", + "shell.execute_reply.started": "2022-03-17T10:12:43.371208Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- `apify_token`: personal token apify creates to access data\n", + "- `post_url`: link to the instagram post\n", + "- `output_csv`: excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ce903236-60d1-4087-a31e-9321f2df6112", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.885709Z", + "iopub.status.busy": "2024-07-24T09:17:53.885471Z", + "iopub.status.idle": "2024-07-24T09:17:53.889147Z", + "shell.execute_reply": "2024-07-24T09:17:53.888536Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.885681Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU847JwNP\"\n", + "post_url = \"https://www.instagram.com/p/Cn0cUc7KelU/\"\n", + "output_csv = f\"{post_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_post_comments.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "model_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "d87a82f5-cb30-4f63-84e0-01ebe1b3fc7e", + "metadata": {}, + "source": [ + "### Scrape post comments" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "aa4ffac8-3c12-483a-a312-0eb7ff17ffa1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.890462Z", + "iopub.status.busy": "2024-07-24T09:17:53.890084Z", + "iopub.status.idle": "2024-07-24T09:18:11.543588Z", + "shell.execute_reply": "2024-07-24T09:18:11.542798Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.890430Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the input for the Instagram Comment Scraper actor\n", + "input_data = {\n", + " \"directUrls\": [post_url],\n", + " \"resultsType\": \"comments\",\n", + "}\n", + "\n", + "# Make a request to start the actor\n", + "start_actor_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs?token={apify_token}\"\n", + "response = requests.post(start_actor_url, json=input_data)\n", + "run_details = response.json()\n", + "\n", + "# Extract the run ID\n", + "run_id = run_details['data']['id']\n", + "\n", + "# Define the URL to fetch the actor run status\n", + "run_status_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs/{run_id}?token={apify_token}\"\n", + "\n", + "# Wait for the actor to finish\n", + "while True:\n", + " status_response = requests.get(run_status_url)\n", + " status_data = status_response.json()\n", + " if status_data['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n", + " break\n", + " time.sleep(5) # Wait for 5 seconds before checking again\n", + "\n", + "if status_data['data']['status'] == 'SUCCEEDED':\n", + " # Define the URL to fetch the results\n", + " dataset_id = status_data['data']['defaultDatasetId']\n", + " dataset_url = f\"https://api.apify.com/v2/datasets/{dataset_id}/items?token={apify_token}&format=json\"\n", + "\n", + " # Fetch the comments\n", + " comments_response = requests.get(dataset_url)\n", + " comments_data = comments_response.json()\n", + "\n", + "else:\n", + " print(f\"Actor run did not succeed. Status: {status_data['data']['status']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3d771a70-1245-4702-9014-324ae540d8ec", + "metadata": {}, + "source": [ + "### Dataframe structure function" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "01deb84d-abd7-4975-ab77-973fe84acf0f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.547048Z", + "iopub.status.busy": "2024-07-24T09:18:11.546861Z", + "iopub.status.idle": "2024-07-24T09:18:11.551670Z", + "shell.execute_reply": "2024-07-24T09:18:11.551119Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.547027Z" + } + }, + "outputs": [], + "source": [ + "def get_comments(\n", + " cid,\n", + " text,\n", + " username,\n", + " profile_picture,\n", + " timestamp,\n", + " likes_count\n", + "):\n", + " return {\n", + " \"ID\": cid,\n", + " \"TEXT\": text,\n", + " \"USERNAME\": username,\n", + " \"PROFILE_PICTURE\": profile_picture,\n", + " \"TIMESTAMP\": timestamp,\n", + " \"LIKES_COUNT\": likes_count\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "output_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "display_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.552842Z", + "iopub.status.busy": "2024-07-24T09:18:11.552615Z", + "iopub.status.idle": "2024-07-24T09:18:11.873573Z", + "shell.execute_reply": "2024-07-24T09:18:11.873017Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.552814Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | ID | \n", + "TEXT | \n", + "USERNAME | \n", + "PROFILE_PICTURE | \n", + "TIMESTAMP | \n", + "LIKES_COUNT | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "17858772584879006 | \n", + "Promote it on @writing._.skill | \n", + "aditya__.7443 | \n", + "https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n", + "2023-01-25T01:20:12.000Z | \n", + "0 | \n", + "
1 | \n", + "17842757270932646 | \n", + "Promote at @Thewriters_heaven | \n", + "skylarsrwriter | \n", + "https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n", + "2023-01-25T01:22:15.000Z | \n", + "0 | \n", + "
2 | \n", + "17945160482350602 | \n", + "@Its_chetram_4444 | \n", + "skylarsrwriter | \n", + "https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n", + "2023-01-25T01:22:22.000Z | \n", + "0 | \n", + "
3 | \n", + "18007951324553277 | \n", + "Promote at @TheAuthors.World 💫 | \n", + "author__mack16 | \n", + "https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... | \n", + "2023-01-25T01:24:59.000Z | \n", + "0 | \n", + "