diff --git a/Reddit/Reddit_Search.ipynb b/Reddit/Reddit_Search.ipynb new file mode 100644 index 0000000000..3265cc9bae --- /dev/null +++ b/Reddit/Reddit_Search.ipynb @@ -0,0 +1,727 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "42fce9ae-03b7-467b-b5ba-4fd66ffc50ec", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"Naas\"" + ] + }, + { + "cell_type": "markdown", + "id": "9861bc61-3c92-41a8-8489-aba0d2196fb6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# Reddit - Search" + ] + }, + { + "cell_type": "markdown", + "id": "89c7636e-14c1-45ab-8cbc-d6826f469f6d", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #reddit #search #api #dev #get #python" + ] + }, + { + "cell_type": "markdown", + "id": "1138db6c-791e-4ae3-9d34-7038d3a97482", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Maxime Jublou](https://www.linkedin.com/in/maximejublou)" + ] + }, + { + "cell_type": "markdown", + "id": "5fd2e42a-b7ea-4a4e-9a8a-f44dd737117f", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2023-10-10 (Created: 2023-10-10)" + ] + }, + { + "cell_type": "markdown", + "id": "ea7651e0-9bbd-4c37-84a0-f5bcf650259e", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Description:** This notebook searches Reddit for posts and comments using the Reddit API. It is usefull for organizations to quickly find relevant content on Reddit." + ] + }, + { + "cell_type": "markdown", + "id": "a45bc61b-a313-4b01-a698-30f9c16cddbd", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**References:**\n", + "- [Reddit API](https://www.reddit.com/dev/api)\n", + "- [GET search](https://www.reddit.com/dev/api#GET_search)" + ] + }, + { + "cell_type": "markdown", + "id": "ae265cbb-ea8b-48c1-98de-6ec27b1195a1", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "1720a16f-9a03-4955-a120-138d615204ca", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "4415467d-e9bf-4d3a-82c1-b6c72f60fded", + "metadata": { + "execution": { + "iopub.execute_input": "2023-10-10T14:13:21.138940Z", + "iopub.status.busy": "2023-10-10T14:13:21.138696Z", + "iopub.status.idle": "2023-10-10T14:13:21.141732Z", + "shell.execute_reply": "2023-10-10T14:13:21.141083Z", + "shell.execute_reply.started": "2023-10-10T14:13:21.138916Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import pandas as pd\n", + "import pydash as _" + ] + }, + { + "cell_type": "markdown", + "id": "cc11f4e7-1182-42bd-9948-e35f49e7b5c1", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Setup variables\n", + "- **query**: The search query\n", + "- **sort**: The type of sorting (relevance, hot, top, new, comments)\n", + "- **time**: The time period of the results (hour, day, week, month, year, all)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "63531d1e-521a-46f1-937f-d5d74254f99d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-10-10T14:10:34.970097Z", + "iopub.status.busy": "2023-10-10T14:10:34.969863Z", + "iopub.status.idle": "2023-10-10T14:10:34.973099Z", + "shell.execute_reply": "2023-10-10T14:10:34.972406Z", + "shell.execute_reply.started": "2023-10-10T14:10:34.970073Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "query = \"data science\"\n", + "sort = \"relevance\"\n", + "time = \"all\"" + ] + }, + { + "cell_type": "markdown", + "id": "3a978e62-60f9-4084-92fa-89c9f64f59aa", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "f92542d4-ae07-4d1f-babf-ef544cbd94be", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Search Reddit" + ] + }, + { + "cell_type": "markdown", + "id": "00e5829f-517f-4684-bff5-499a538d011a", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "Long description of the function: This function searches Reddit for posts and comments using the Reddit API." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "941efb8e-f4d9-46c8-a101-b06c20e15b44", + "metadata": { + "execution": { + "iopub.execute_input": "2023-10-10T14:11:19.868086Z", + "iopub.status.busy": "2023-10-10T14:11:19.867851Z", + "iopub.status.idle": "2023-10-10T14:11:20.683324Z", + "shell.execute_reply": "2023-10-10T14:11:20.682638Z", + "shell.execute_reply.started": "2023-10-10T14:11:19.868060Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "url = \"https://www.reddit.com/search.json\"\n", + "params = {\"q\": query, \"sort\": sort, \"t\": time}\n", + "response = requests.get(url, params=params)" + ] + }, + { + "cell_type": "markdown", + "id": "da53ec13-094b-41c8-9917-35d0f0ccd8df", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "f15c8b96-7ed7-48f2-85b9-f438fae9dc29", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d8052e5f-22c9-4043-985f-ddc80b08d081", + "metadata": { + "execution": { + "iopub.execute_input": "2023-10-10T14:11:26.008694Z", + "iopub.status.busy": "2023-10-10T14:11:26.007426Z", + "iopub.status.idle": "2023-10-10T14:11:26.013652Z", + "shell.execute_reply": "2023-10-10T14:11:26.013028Z", + "shell.execute_reply.started": "2023-10-10T14:11:26.008657Z" + }, + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "r = response.json()" + ] + }, + { + "cell_type": "markdown", + "id": "b2226f4d-6c96-497f-8310-ca6f87b84e5b", + "metadata": {}, + "source": [ + "### Extract meaningful data and sorting by most successful content" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "3f94eff6-4b20-4d93-aed9-57a3ab1903bd", + "metadata": { + "execution": { + "iopub.execute_input": "2023-10-10T14:26:18.225302Z", + "iopub.status.busy": "2023-10-10T14:26:18.225069Z", + "iopub.status.idle": "2023-10-10T14:26:18.247667Z", + "shell.execute_reply": "2023-10-10T14:26:18.247059Z", + "shell.execute_reply.started": "2023-10-10T14:26:18.225279Z" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
subredditsubreddit_name_prefixedtitleselftexturldownsups
13dataisbeautifulr/dataisbeautiful[OC] The debate drinking game, according to da...https://i.redd.it/xiocduiq6nu51.png041728
15ProgrammerHumorr/ProgrammerHumorThis almost feels like a test (for data scienc...https://i.redd.it/3atursqpf9ha1.jpg02852
11TheLastAirbenderr/TheLastAirbenderData Science Project: Relevant Words for ATLA ...I scraped the transcripts of all 61 episodes o...https://www.reddit.com/gallery/13x7o5t02430
12datasciencer/datascienceData Science in 2022https://i.imgur.com/60Vnj0X.png02339
18soccercirclejerkr/soccercirclejerkData science is his passionhttps://i.redd.it/ece14yb76n7b1.png01529
22ChoosingBeggarsr/ChoosingBeggarsData science internship adhttps://www.reddit.com/gallery/ta0j5u01459
17196r/196Data science nightmare rulehttps://i.redd.it/2izpy2bb9grb1.jpg01230
6datasciencer/datasciencehot take: forget data science, we need more an...People are obsessed with pursuing data science...https://www.reddit.com/r/datascience/comments/...01131
19datasciencer/datascienceDescribe Data Science in Three Wordshttps://i.redd.it/3r36uflzp3b91.jpg01051
10datasciencer/datascienceLLMs hype has killed data scienceThat's it.\\n\\nAt my work in a huge company alm...https://www.reddit.com/r/datascience/comments/...0834
21datasciencer/datascienceData Science Salary Progressionhttps://i.redd.it/zdysrc2jttw81.jpg0649
14datasciencer/datascience[AMA] I'm a data science manager in FAANGI've worked at 3 different FAANGs as a data sc...https://www.reddit.com/r/datascience/comments/...0583
24datasciencer/datascienceWhats Your Data Science Hot Take?Mastering excel is necessary for 99% of data s...https://www.reddit.com/r/datascience/comments/...0571
23datasciencer/datascienceThe Data Science TrapIt is no longer open to question that data sci...https://www.reddit.com/r/datascience/comments/...0526
4cscareerquestionsr/cscareerquestionsData Science feels like a dead end. Any opinions?Hi all,\\n\\nObviously a throwaway account. To a...https://www.reddit.com/r/cscareerquestions/com...0390
0datasciencer/datascienceWhat are the brutal truths about working in Da...What are the brutal truths about working in Da...https://www.reddit.com/r/datascience/comments/...0372
7datasciencer/datascienceIs data science a bad career long-term?I work closely with professional engineers (el...https://www.reddit.com/r/datascience/comments/...0259
16datasciencer/datascienceIs data science/data engineering over saturated?On LinkedIn I always see 100+ applicants for e...https://www.reddit.com/r/datascience/comments/...0216
20datasciencer/datascienceUnpopular Opinion: you don't need a passion in...Obviously don't walk into interviews telling y...https://www.reddit.com/r/datascience/comments/...0208
5developersIndiar/developersIndiaIs \"data science\" a bubble about to burst?I spoke to an experienced guy in the IT sector...https://www.reddit.com/r/developersIndia/comme...0172
2phcareersr/phcareersFailed Data Analyst/Data Science ShifterI thought I'd share my personal journey with y...https://www.reddit.com/r/phcareers/comments/16...0149
3datasciencer/datascienceIs data science oversaturated now? | Job MarketWhenever I've scrolled through Linkdin, I'm se...https://www.reddit.com/r/datascience/comments/...0105
9datasciencer/datascienceDo you like being a data scientist?I have the option to study two different cours...https://www.reddit.com/r/datascience/comments/...072
8cscareerquestionsr/cscareerquestionsShould I stop studying web development and mov...Yesterday I had a talk with a senior in this i...https://www.reddit.com/r/cscareerquestions/com...04
1datasciencer/datascienceIs data science still worth it in 2023Hello\\nI have a degree in physics and I want t...https://www.reddit.com/r/datascience/comments/...02
\n", + "
" + ], + "text/plain": [ + " subreddit subreddit_name_prefixed \\\n", + "13 dataisbeautiful r/dataisbeautiful \n", + "15 ProgrammerHumor r/ProgrammerHumor \n", + "11 TheLastAirbender r/TheLastAirbender \n", + "12 datascience r/datascience \n", + "18 soccercirclejerk r/soccercirclejerk \n", + "22 ChoosingBeggars r/ChoosingBeggars \n", + "17 196 r/196 \n", + "6 datascience r/datascience \n", + "19 datascience r/datascience \n", + "10 datascience r/datascience \n", + "21 datascience r/datascience \n", + "14 datascience r/datascience \n", + "24 datascience r/datascience \n", + "23 datascience r/datascience \n", + "4 cscareerquestions r/cscareerquestions \n", + "0 datascience r/datascience \n", + "7 datascience r/datascience \n", + "16 datascience r/datascience \n", + "20 datascience r/datascience \n", + "5 developersIndia r/developersIndia \n", + "2 phcareers r/phcareers \n", + "3 datascience r/datascience \n", + "9 datascience r/datascience \n", + "8 cscareerquestions r/cscareerquestions \n", + "1 datascience r/datascience \n", + "\n", + " title \\\n", + "13 [OC] The debate drinking game, according to da... \n", + "15 This almost feels like a test (for data scienc... \n", + "11 Data Science Project: Relevant Words for ATLA ... \n", + "12 Data Science in 2022 \n", + "18 Data science is his passion \n", + "22 Data science internship ad \n", + "17 Data science nightmare rule \n", + "6 hot take: forget data science, we need more an... \n", + "19 Describe Data Science in Three Words \n", + "10 LLMs hype has killed data science \n", + "21 Data Science Salary Progression \n", + "14 [AMA] I'm a data science manager in FAANG \n", + "24 Whats Your Data Science Hot Take? \n", + "23 The Data Science Trap \n", + "4 Data Science feels like a dead end. Any opinions? \n", + "0 What are the brutal truths about working in Da... \n", + "7 Is data science a bad career long-term? \n", + "16 Is data science/data engineering over saturated? \n", + "20 Unpopular Opinion: you don't need a passion in... \n", + "5 Is \"data science\" a bubble about to burst? \n", + "2 Failed Data Analyst/Data Science Shifter \n", + "3 Is data science oversaturated now? | Job Market \n", + "9 Do you like being a data scientist? \n", + "8 Should I stop studying web development and mov... \n", + "1 Is data science still worth it in 2023 \n", + "\n", + " selftext \\\n", + "13 \n", + "15 \n", + "11 I scraped the transcripts of all 61 episodes o... \n", + "12 \n", + "18 \n", + "22 \n", + "17 \n", + "6 People are obsessed with pursuing data science... \n", + "19 \n", + "10 That's it.\\n\\nAt my work in a huge company alm... \n", + "21 \n", + "14 I've worked at 3 different FAANGs as a data sc... \n", + "24 Mastering excel is necessary for 99% of data s... \n", + "23 It is no longer open to question that data sci... \n", + "4 Hi all,\\n\\nObviously a throwaway account. To a... \n", + "0 What are the brutal truths about working in Da... \n", + "7 I work closely with professional engineers (el... \n", + "16 On LinkedIn I always see 100+ applicants for e... \n", + "20 Obviously don't walk into interviews telling y... \n", + "5 I spoke to an experienced guy in the IT sector... \n", + "2 I thought I'd share my personal journey with y... \n", + "3 Whenever I've scrolled through Linkdin, I'm se... \n", + "9 I have the option to study two different cours... \n", + "8 Yesterday I had a talk with a senior in this i... \n", + "1 Hello\\nI have a degree in physics and I want t... \n", + "\n", + " url downs ups \n", + "13 https://i.redd.it/xiocduiq6nu51.png 0 41728 \n", + "15 https://i.redd.it/3atursqpf9ha1.jpg 0 2852 \n", + "11 https://www.reddit.com/gallery/13x7o5t 0 2430 \n", + "12 https://i.imgur.com/60Vnj0X.png 0 2339 \n", + "18 https://i.redd.it/ece14yb76n7b1.png 0 1529 \n", + "22 https://www.reddit.com/gallery/ta0j5u 0 1459 \n", + "17 https://i.redd.it/2izpy2bb9grb1.jpg 0 1230 \n", + "6 https://www.reddit.com/r/datascience/comments/... 0 1131 \n", + "19 https://i.redd.it/3r36uflzp3b91.jpg 0 1051 \n", + "10 https://www.reddit.com/r/datascience/comments/... 0 834 \n", + "21 https://i.redd.it/zdysrc2jttw81.jpg 0 649 \n", + "14 https://www.reddit.com/r/datascience/comments/... 0 583 \n", + "24 https://www.reddit.com/r/datascience/comments/... 0 571 \n", + "23 https://www.reddit.com/r/datascience/comments/... 0 526 \n", + "4 https://www.reddit.com/r/cscareerquestions/com... 0 390 \n", + "0 https://www.reddit.com/r/datascience/comments/... 0 372 \n", + "7 https://www.reddit.com/r/datascience/comments/... 0 259 \n", + "16 https://www.reddit.com/r/datascience/comments/... 0 216 \n", + "20 https://www.reddit.com/r/datascience/comments/... 0 208 \n", + "5 https://www.reddit.com/r/developersIndia/comme... 0 172 \n", + "2 https://www.reddit.com/r/phcareers/comments/16... 0 149 \n", + "3 https://www.reddit.com/r/datascience/comments/... 0 105 \n", + "9 https://www.reddit.com/r/datascience/comments/... 0 72 \n", + "8 https://www.reddit.com/r/cscareerquestions/com... 0 4 \n", + "1 https://www.reddit.com/r/datascience/comments/... 0 2 " + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame([\n", + " {\n", + " 'subreddit': _.get(c, 'data.subreddit'),\n", + " 'subreddit_name_prefixed': _.get(c, 'data.subreddit_name_prefixed'),\n", + " 'title': _.get(c, 'data.title'),\n", + " 'selftext': _.get(c, 'data.selftext'),\n", + " 'url': _.get(c, 'data.url'),\n", + " 'downs': _.get(c, 'data.downs'),\n", + " 'ups': _.get(c, 'data.ups'),\n", + "\n", + " } for c in _.get(r, 'data.children')\n", + "])\n", + "df.sort_values('ups', ascending=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}