From b8fe1b15e40ca59d3bafd3595f82e0777f058f78 Mon Sep 17 00:00:00 2001 From: Varsha Kumar Date: Wed, 24 Jul 2024 11:22:52 +0200 Subject: [PATCH] feat: get comments from instagram post --- .../Instagram_Get_comments_from_post.ipynb | 506 ++++++++++++++++++ 1 file changed, 506 insertions(+) create mode 100644 Instagram/Instagram_Get_comments_from_post.ipynb diff --git a/Instagram/Instagram_Get_comments_from_post.ipynb b/Instagram/Instagram_Get_comments_from_post.ipynb new file mode 100644 index 0000000000..811aa92130 --- /dev/null +++ b/Instagram/Instagram_Get_comments_from_post.ipynb @@ -0,0 +1,506 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "rocky-cardiff", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Instagram.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "judicial-headline", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Instagram - Get comments from post\n", + "Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "1cef8cab-e783-4589-b2c4-c21ee380c773", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #instagram #likes #comments #snippet #content" + ] + }, + { + "cell_type": "markdown", + "id": "naas-author", + "metadata": { + "papermill": {}, + "tags": [ + "naas" + ] + }, + "source": [ + "**Author:** [Varsha Kumar](https://www.linkedin.com/in/varsha-kumar-590466305/)" + ] + }, + { + "cell_type": "markdown", + "id": "8edddd04-a2af-47f7-82aa-a9108cdcd3d4", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-07-10 (Created: 2024-07-10)" + ] + }, + { + "cell_type": "markdown", + "id": "naas-description", + "metadata": { + "papermill": {}, + "tags": [ + "description" + ] + }, + "source": [ + "**Description:** This notebook allows users to extract comments from an Instagram post." + ] + }, + { + "cell_type": "markdown", + "id": "88ed8bb2-2694-4848-a3ef-afc0f4e65e07", + "metadata": {}, + "source": [ + "### How to retrive API key with apify" + ] + }, + { + "cell_type": "markdown", + "id": "0fca1344-877b-417d-94f0-1f024a029523", + "metadata": {}, + "source": [ + "1. Go to https://apify.com.\n", + "2. Click \"Sign up for free\" and use your google account to sign up.\n", + "3. Once your account has been created, navigate to \"Settings\" on the left panel of the screen.\n", + "4. Here you will click on the tab labeled \"Integrations\" where your personal API token that was automatically generated with sign up will be.\n", + "5. Copy that token and use it to extract data!" + ] + }, + { + "cell_type": "markdown", + "id": "input_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "import_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "d40e70c0-a388-417b-a50f-c50bb82cc0b3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.144020Z", + "iopub.status.busy": "2024-07-24T09:17:53.143601Z", + "iopub.status.idle": "2024-07-24T09:17:53.884427Z", + "shell.execute_reply": "2024-07-24T09:17:53.883642Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.143947Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import json\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "id": "5c3c12ca-5f3e-411a-aa54-c2b4b612a91d", + "metadata": { + "execution": { + "iopub.execute_input": "2022-03-17T10:12:43.371273Z", + "iopub.status.busy": "2022-03-17T10:12:43.371011Z", + "iopub.status.idle": "2022-03-17T10:12:43.374551Z", + "shell.execute_reply": "2022-03-17T10:12:43.373882Z", + "shell.execute_reply.started": "2022-03-17T10:12:43.371208Z" + }, + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- `apify_token`: personal token apify creates to access data\n", + "- `post_url`: link to the instagram post\n", + "- `output_csv`: excel file" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ce903236-60d1-4087-a31e-9321f2df6112", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.885709Z", + "iopub.status.busy": "2024-07-24T09:17:53.885471Z", + "iopub.status.idle": "2024-07-24T09:17:53.889147Z", + "shell.execute_reply": "2024-07-24T09:17:53.888536Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.885681Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "apify_token = \"apify_api_gXWnLEPiE7wC8ALUwQkJ0QcdbuQzU847JwNP\"\n", + "post_url = \"https://www.instagram.com/p/Cn0cUc7KelU/\"\n", + "output_csv = f\"{post_url.split('https://www.instagram.com/')[1].replace('/', '_')}instagram_post_comments.csv\"" + ] + }, + { + "cell_type": "markdown", + "id": "model_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "d87a82f5-cb30-4f63-84e0-01ebe1b3fc7e", + "metadata": {}, + "source": [ + "### Scrape post comments" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "aa4ffac8-3c12-483a-a312-0eb7ff17ffa1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:17:53.890462Z", + "iopub.status.busy": "2024-07-24T09:17:53.890084Z", + "iopub.status.idle": "2024-07-24T09:18:11.543588Z", + "shell.execute_reply": "2024-07-24T09:18:11.542798Z", + "shell.execute_reply.started": "2024-07-24T09:17:53.890430Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the input for the Instagram Comment Scraper actor\n", + "input_data = {\n", + " \"directUrls\": [post_url],\n", + " \"resultsType\": \"comments\",\n", + "}\n", + "\n", + "# Make a request to start the actor\n", + "start_actor_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs?token={apify_token}\"\n", + "response = requests.post(start_actor_url, json=input_data)\n", + "run_details = response.json()\n", + "\n", + "# Extract the run ID\n", + "run_id = run_details['data']['id']\n", + "\n", + "# Define the URL to fetch the actor run status\n", + "run_status_url = f\"https://api.apify.com/v2/acts/apify~instagram-comment-scraper/runs/{run_id}?token={apify_token}\"\n", + "\n", + "# Wait for the actor to finish\n", + "while True:\n", + " status_response = requests.get(run_status_url)\n", + " status_data = status_response.json()\n", + " if status_data['data']['status'] in ['SUCCEEDED', 'FAILED', 'ABORTED']:\n", + " break\n", + " time.sleep(5) # Wait for 5 seconds before checking again\n", + "\n", + "if status_data['data']['status'] == 'SUCCEEDED':\n", + " # Define the URL to fetch the results\n", + " dataset_id = status_data['data']['defaultDatasetId']\n", + " dataset_url = f\"https://api.apify.com/v2/datasets/{dataset_id}/items?token={apify_token}&format=json\"\n", + "\n", + " # Fetch the comments\n", + " comments_response = requests.get(dataset_url)\n", + " comments_data = comments_response.json()\n", + "\n", + "else:\n", + " print(f\"Actor run did not succeed. Status: {status_data['data']['status']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "3d771a70-1245-4702-9014-324ae540d8ec", + "metadata": {}, + "source": [ + "### Dataframe structure function" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "01deb84d-abd7-4975-ab77-973fe84acf0f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.547048Z", + "iopub.status.busy": "2024-07-24T09:18:11.546861Z", + "iopub.status.idle": "2024-07-24T09:18:11.551670Z", + "shell.execute_reply": "2024-07-24T09:18:11.551119Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.547027Z" + } + }, + "outputs": [], + "source": [ + "def get_comments(\n", + " cid,\n", + " text,\n", + " username,\n", + " profile_picture,\n", + " timestamp,\n", + " likes_count\n", + "):\n", + " return {\n", + " \"ID\": cid,\n", + " \"TEXT\": text,\n", + " \"USERNAME\": username,\n", + " \"PROFILE_PICTURE\": profile_picture,\n", + " \"TIMESTAMP\": timestamp,\n", + " \"LIKES_COUNT\": likes_count\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "output_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "display_cell", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display output" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c7ac65a4-dd93-43c4-8090-c86a2aa28898", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.552842Z", + "iopub.status.busy": "2024-07-24T09:18:11.552615Z", + "iopub.status.idle": "2024-07-24T09:18:11.873573Z", + "shell.execute_reply": "2024-07-24T09:18:11.873017Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.552814Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IDTEXTUSERNAMEPROFILE_PICTURETIMESTAMPLIKES_COUNT
017858772584879006Promote it on @writing._.skilladitya__.7443https://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:20:12.000Z0
117842757270932646Promote at @Thewriters_heavenskylarsrwriterhttps://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:22:15.000Z0
217945160482350602@Its_chetram_4444skylarsrwriterhttps://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:22:22.000Z0
318007951324553277Promote at @TheAuthors.World 💫author__mack16https://instagram.fhyw1-1.fna.fbcdn.net/v/t51....2023-01-25T01:24:59.000Z0
\n", + "
" + ], + "text/plain": [ + " ID TEXT USERNAME \\\n", + "0 17858772584879006 Promote it on @writing._.skill aditya__.7443 \n", + "1 17842757270932646 Promote at @Thewriters_heaven skylarsrwriter \n", + "2 17945160482350602 @Its_chetram_4444 skylarsrwriter \n", + "3 18007951324553277 Promote at @TheAuthors.World 💫 author__mack16 \n", + "\n", + " PROFILE_PICTURE \\\n", + "0 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "1 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "2 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "3 https://instagram.fhyw1-1.fna.fbcdn.net/v/t51.... \n", + "\n", + " TIMESTAMP LIKES_COUNT \n", + "0 2023-01-25T01:20:12.000Z 0 \n", + "1 2023-01-25T01:22:15.000Z 0 \n", + "2 2023-01-25T01:22:22.000Z 0 \n", + "3 2023-01-25T01:24:59.000Z 0 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = []\n", + "\n", + "for comment in comments_data:\n", + " data_comment = get_comments(\n", + " comment[\"id\"],\n", + " comment[\"text\"],\n", + " comment[\"ownerUsername\"],\n", + " comment[\"ownerProfilePicUrl\"],\n", + " comment[\"timestamp\"],\n", + " comment[\"likesCount\"]\n", + " )\n", + " data.append(data_comment)\n", + " \n", + "df = pd.DataFrame(data)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "id": "636d22d3-beac-4de3-9ea8-6232e9cdcc6c", + "metadata": {}, + "source": [ + "### Save dataframe to csv" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "be26e796-6c38-4152-a0b8-49f51b617a6f", + "metadata": { + "execution": { + "iopub.execute_input": "2024-07-24T09:18:11.875551Z", + "iopub.status.busy": "2024-07-24T09:18:11.875052Z", + "iopub.status.idle": "2024-07-24T09:18:12.151858Z", + "shell.execute_reply": "2024-07-24T09:18:12.151261Z", + "shell.execute_reply.started": "2024-07-24T09:18:11.875517Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "df.to_csv(output_csv, index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "38c44121d518d242dcfd1209fca1b300a11475f5836b8ae8f214c0b4524816a9", + "notebook_path": "Instagram/Instagram_Post_image_and_caption.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.3.3" + }, + "toc-autonumbering": false + }, + "nbformat": 4, + "nbformat_minor": 5 +}