From 316c58500c8695bae815a400e86c3df30760e3fe Mon Sep 17 00:00:00 2001 From: sohaib anwaar Date: Wed, 21 Feb 2024 14:34:06 +0100 Subject: [PATCH 1/4] add: Adding hubspot search notebook --- HubSpot/HubSpot_Search.ipynb | 305 +++++++++++++++++++++++++++++++++-- 1 file changed, 293 insertions(+), 12 deletions(-) diff --git a/HubSpot/HubSpot_Search.ipynb b/HubSpot/HubSpot_Search.ipynb index 3e2058da5b..8c3d909d7e 100644 --- a/HubSpot/HubSpot_Search.ipynb +++ b/HubSpot/HubSpot_Search.ipynb @@ -103,16 +103,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "34197898-5462-40a5-8bbc-38c91e3cf0fd", "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:10:13.605231Z", + "iopub.status.busy": "2024-02-21T13:10:13.604952Z", + "iopub.status.idle": "2024-02-21T13:10:16.077579Z", + "shell.execute_reply": "2024-02-21T13:10:16.076859Z", + "shell.execute_reply.started": "2024-02-21T13:10:13.605164Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ + "import json\n", + "import naas\n", "import requests\n", - "import json" + "from pprint import pprint\n", + "try:\n", + " from hubspot import HubSpot\n", + "except:\n", + " !pip install hubspot simplejson delorean\n", + " from hubspot import HubSpot" ] }, { @@ -130,16 +144,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "b8e252fb-6dd2-42e4-88a5-6ccf2c40072f", "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:26:10.755790Z", + "iopub.status.busy": "2024-02-21T13:26:10.755543Z", + "iopub.status.idle": "2024-02-21T13:26:10.904253Z", + "shell.execute_reply": "2024-02-21T13:26:10.903625Z", + "shell.execute_reply.started": "2024-02-21T13:26:10.755766Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "API_KEY = \"your-api-key\"\n", - "ENDPOINT_URL = \"https://api.hubapi.com/crm/v3/objects/contacts/search\"" + "API_KEY = naas.secret.get(\"HS_ACCESS_TOKEN\")\n", + "ENDPOINT_URL_SEARCH = \"https://api.hubapi.com/crm/v3/objects/contacts/search\"\n", + "ENDPOINT_URL_DEALS = \"https://api.hubapi.com/crm/v3/objects/deals/search\"\n", + "ENDPOINT_URL_COMPANY = \"https://api.hubapi.com/crm/v3/objects/companies/search\"" ] }, { @@ -177,24 +200,162 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "e8d4b220-4a53-4601-9e78-4b40f53c9060", "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:26:40.758865Z", + "iopub.status.busy": "2024-02-21T13:26:40.758598Z", + "iopub.status.idle": "2024-02-21T13:26:40.763172Z", + "shell.execute_reply": "2024-02-21T13:26:40.762527Z", + "shell.execute_reply.started": "2024-02-21T13:26:40.758838Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ "def search_contacts(api_key, endpoint_url, query):\n", + " \"\"\"\n", + " Search contacts based on the provided query.\n", + "\n", + " Args:\n", + " api_key (str): The API key for authentication.\n", + " endpoint_url (str): The URL of the endpoint to send the request to.\n", + " query (str): The query string to search for.\n", + "\n", + " Returns:\n", + " dict: A dictionary containing the response data in JSON format.\n", + " \"\"\"\n", + " # Prepare headers for the HTTP request\n", " headers = {\"Content-Type\": \"application/json\", \"Authorization\": f\"Bearer {api_key}\"}\n", + " # Prepare the payload for the POST request\n", " payload = {\n", - " \"filterGroups\": [\n", - " {\"filters\": [{\"fieldName\": \"firstname\", \"operator\": \"EQ\", \"value\": query}]}\n", - " ],\n", + " \"filterGroups\":[\n", + " {\n", + " \"filters\":[\n", + " {\n", + " \"propertyName\": \"firstname\",\n", + " \"operator\": \"EQ\",\n", + " \"value\": query\n", + " }\n", + " ]\n", + " }\n", + " ],\n", " \"sort\": [{\"propertyName\": \"createdate\", \"direction\": \"DESCENDING\"}],\n", " \"properties\": [\"firstname\", \"lastname\", \"email\"],\n", + " 'limit':5,\n", + " }\n", + " # Send the POST request to the endpoint URL\n", + " response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))\n", + " # Parse the JSON response and return it\n", + " return response.json()" + ] + }, + { + "cell_type": "markdown", + "id": "21a4adf3-a53a-4e2a-8a96-9acee9c6ef5a", + "metadata": {}, + "source": [ + "### Search Deals function" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "017c0fee-251d-4f3f-b3f6-138dc12ffc0e", + "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:26:41.095015Z", + "iopub.status.busy": "2024-02-21T13:26:41.094781Z", + "iopub.status.idle": "2024-02-21T13:26:41.099200Z", + "shell.execute_reply": "2024-02-21T13:26:41.098494Z", + "shell.execute_reply.started": "2024-02-21T13:26:41.094992Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def search_deals(api_key, endpoint_url):\n", + " \"\"\"\n", + " Search deals based on the provided query.\n", + "\n", + " Args:\n", + " api_key (str): The API key for authentication.\n", + " endpoint_url (str): The URL of the endpoint to send the request to.\n", + " query (str): The query string to search for.\n", + "\n", + " Returns:\n", + " dict: A dictionary containing the response data in JSON format.\n", + " \"\"\"\n", + " # Prepare headers for the HTTP request\n", + " headers = {\"Content-Type\": \"application/json\", \"Authorization\": f\"Bearer {api_key}\"}\n", + " # Prepare the payload for the POST request\n", + " payload = {\n", + " \"filterGroups\":[{\n", + " \"filters\":[\n", + " {\n", + " \"propertyName\":\"dealstage\",\n", + " \"operator\":\"IN\",\n", + " \"values\": [\"appointmentscheduled\", \"contractsent\", \"qualifiedtobuy\"]\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + " }\n", + " # Send the POST request to the endpoint URL\n", + " response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))\n", + " # Parse the JSON response and return it\n", + " return response.json()" + ] + }, + { + "cell_type": "markdown", + "id": "afd55cc9-0278-4d24-9691-3b88c26d0b7b", + "metadata": {}, + "source": [ + "### Search companies function" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "b6873422-eb72-49ea-b723-be1e731f5b60", + "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:26:41.370409Z", + "iopub.status.busy": "2024-02-21T13:26:41.370159Z", + "iopub.status.idle": "2024-02-21T13:26:41.374477Z", + "shell.execute_reply": "2024-02-21T13:26:41.373778Z", + "shell.execute_reply.started": "2024-02-21T13:26:41.370384Z" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def search_companies(api_key, endpoint_url):\n", + " \"\"\"\n", + " Search deals based on the provided query.\n", + "\n", + " Args:\n", + " api_key (str): The API key for authentication.\n", + " endpoint_url (str): The URL of the endpoint to send the request to.\n", + " query (str): The query string to search for.\n", + "\n", + " Returns:\n", + " dict: A dictionary containing the response data in JSON format.\n", + " \"\"\"\n", + " # Prepare headers for the HTTP request\n", + " headers = {\"Content-Type\": \"application/json\", \"Authorization\": f\"Bearer {api_key}\"}\n", + " # Prepare the payload for the POST request\n", + " payload = {\n", + " 'limit':5\n", " }\n", + " \n", + " \n", + " # Send the POST request to the endpoint URL\n", " response = requests.post(endpoint_url, headers=headers, data=json.dumps(payload))\n", + " # Parse the JSON response and return it\n", " return response.json()" ] }, @@ -220,19 +381,137 @@ "### Display result" ] }, + { + "cell_type": "markdown", + "id": "8f7b42b2-8ef6-4f14-81a4-7f99d6751301", + "metadata": {}, + "source": [ + "#### search contacts" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "f1ba4b45-75f7-4072-b887-62031393cc98", "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:26:41.918432Z", + "iopub.status.busy": "2024-02-21T13:26:41.918198Z", + "iopub.status.idle": "2024-02-21T13:26:42.083053Z", + "shell.execute_reply": "2024-02-21T13:26:42.082333Z", + "shell.execute_reply.started": "2024-02-21T13:26:41.918409Z" + }, "papermill": {}, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"status\": \"error\",\n", + " \"message\": \"Authentication credentials not found. This API supports OAuth 2.0 authentication and you can find more details at https://developers.hubspot.com/docs/methods/auth/oauth-overview\",\n", + " \"correlationId\": \"57f1f141-e6de-4272-a46a-67ed35a9bd0d\",\n", + " \"category\": \"INVALID_AUTHENTICATION\"\n", + "}\n" + ] + } + ], + "source": [ + "result = search_contacts(API_KEY, ENDPOINT_URL_SEARCH, \"John\")\n", + "print(json.dumps(result, indent=4))" + ] + }, + { + "cell_type": "markdown", + "id": "deb28419-f669-40a9-9781-fa2c276079a7", + "metadata": {}, + "source": [ + "#### search deals" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ba2154f9-b3e1-4390-ace8-626d2d773cbf", + "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:10:16.822502Z", + "iopub.status.busy": "2024-02-21T13:10:16.822241Z", + "iopub.status.idle": "2024-02-21T13:10:17.025872Z", + "shell.execute_reply": "2024-02-21T13:10:17.025256Z", + "shell.execute_reply.started": "2024-02-21T13:10:16.822475Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"status\": \"error\",\n", + " \"message\": \"Authentication credentials not found. This API supports OAuth 2.0 authentication and you can find more details at https://developers.hubspot.com/docs/methods/auth/oauth-overview\",\n", + " \"correlationId\": \"8ee5e6f6-4de6-46dc-b5c6-d26f856c6e63\",\n", + " \"category\": \"INVALID_AUTHENTICATION\"\n", + "}\n" + ] + } + ], + "source": [ + "result = search_deals(API_KEY, ENDPOINT_URL_DEALS)\n", + "print(json.dumps(result, indent=4))" + ] + }, + { + "cell_type": "markdown", + "id": "f58f65a0-11ad-4dc1-afe5-002289c226e9", + "metadata": {}, + "source": [ + "#### search companies" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7c460c13-d666-42c9-b621-bd6dc5d398d5", + "metadata": { + "execution": { + "iopub.execute_input": "2024-02-21T13:10:17.028184Z", + "iopub.status.busy": "2024-02-21T13:10:17.027739Z", + "iopub.status.idle": "2024-02-21T13:10:17.197194Z", + "shell.execute_reply": "2024-02-21T13:10:17.196533Z", + "shell.execute_reply.started": "2024-02-21T13:10:17.028148Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"status\": \"error\",\n", + " \"message\": \"Authentication credentials not found. This API supports OAuth 2.0 authentication and you can find more details at https://developers.hubspot.com/docs/methods/auth/oauth-overview\",\n", + " \"correlationId\": \"23dda337-dc16-47c3-9733-72bdf7aab24e\",\n", + " \"category\": \"INVALID_AUTHENTICATION\"\n", + "}\n" + ] + } + ], "source": [ - "result = search_contacts(API_KEY, ENDPOINT_URL, \"John\")\n", + "result = search_companies(API_KEY, ENDPOINT_URL_COMPANY)\n", "print(json.dumps(result, indent=4))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50a0e23f-7abc-4227-8fc3-8f6ff541a21b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -253,6 +532,8 @@ "pygments_lexer": "ipython3", "version": "3.9.6" }, + "toc-showcode": false, + "toc-showmarkdowntxt": false, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, From a4ae3afbe82ee385a0d0ee15f3740ece0d4e13aa Mon Sep 17 00:00:00 2001 From: sohaib anwaar Date: Fri, 23 Feb 2024 10:43:25 +0100 Subject: [PATCH 2/4] this notebook get saved post and articles --- LinkedIn/LinkedIn_Setup_connection.ipynb | 169 ++++++++++++++++++----- 1 file changed, 137 insertions(+), 32 deletions(-) diff --git a/LinkedIn/LinkedIn_Setup_connection.ipynb b/LinkedIn/LinkedIn_Setup_connection.ipynb index 610948dc6c..935c606104 100644 --- a/LinkedIn/LinkedIn_Setup_connection.ipynb +++ b/LinkedIn/LinkedIn_Setup_connection.ipynb @@ -104,15 +104,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "cc038b10-2679-42bc-909e-09a298339df4", "metadata": { + "execution": { + "iopub.execute_input": "2024-02-23T09:00:40.781448Z", + "iopub.status.busy": "2024-02-23T09:00:40.781212Z", + "iopub.status.idle": "2024-02-23T09:00:40.784559Z", + "shell.execute_reply": "2024-02-23T09:00:40.783873Z", + "shell.execute_reply.started": "2024-02-23T09:00:40.781425Z" + }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "import naas" + "import naas\n", + "import json\n", + "import requests\n", + "from naas_drivers import linkedin\n", + "from bs4 import BeautifulSoup" ] }, { @@ -130,10 +141,44 @@ "- `li_at`: Cookie used to authenticate Members and API clients. This value will be stored under the secret 'LINKEDIN_LI_AT'.\n", "- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation. This value will be stored under the secret 'LINKEDIN_JSESSIONID'.\n", "\n", + "\n", "**Optional**\n", "- `secrets`: Dict to add secrets to naas" ] }, + { + "cell_type": "code", + "execution_count": 30, + "id": "916a9fe8-c806-4542-94d5-2a7a7e5c8028", + "metadata": { + "execution": { + "iopub.execute_input": "2024-02-23T09:00:41.145098Z", + "iopub.status.busy": "2024-02-23T09:00:41.144865Z", + "iopub.status.idle": "2024-02-23T09:00:41.635713Z", + "shell.execute_reply": "2024-02-23T09:00:41.634866Z", + "shell.execute_reply.started": "2024-02-23T09:00:41.145073Z" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "👌 Well done! Your Secret has been sent to production. \n", + "\n", + "PS: to remove the \"Secret\" feature, just replace .add by .delete\n", + "👌 Well done! Your Secret has been sent to production. \n", + "\n", + "PS: to remove the \"Secret\" feature, just replace .add by .delete\n" + ] + } + ], + "source": [ + "li_at = 'Put you li_at token here'\n", + "JSESSIONID = 'Put your Jsession Id here'" + ] + }, { "cell_type": "code", "execution_count": null, @@ -146,15 +191,18 @@ }, "outputs": [], "source": [ - "# Mandatory\n", - "li_at = None\n", - "JSESSIONID = None\n", - "\n", - "# Optional\n", - "secrets = {\n", - " \"LINKEDIN_LI_AT\": li_at,\n", - " \"LINKEDIN_JSESSIONID\": JSESSIONID,\n", - "}" + "# Define headers and cookies\n", + "headers = {\n", + " \"X-Li-Lang\": \"en_US\",\n", + " \"Accept\": \"application/vnd.linkedin.normalized+json+2.1\",\n", + " \"Cache-Control\": \"no-cache\",\n", + " \"X-Requested-With\": \"XMLHttpRequest\",\n", + " \"X-Restli-Protocol-Version\": \"2.0.0\",\n", + " \"Csrf-Token\":JSESSIONID\n", + "}\n", + "cookies = {\"li_at\": li_at, \"JSESSIONID\": JSESSIONID}\n", + "# replace this with your url \n", + "url = \"https://www.linkedin.com//voyager/api/graphql?variables=(start:0,query:(flagshipSearchIntent:SEARCH_MY_ITEMS_SAVED_POSTS))&queryId=voyagerSearchDashClusters.a6589bc963659630adee73df22e9384c\"\n" ] }, { @@ -170,29 +218,64 @@ }, { "cell_type": "markdown", - "id": "3aadae7f-fbe5-4f97-a6a9-6efe9dd1a42f", - "metadata": { - "papermill": {}, - "tags": [] - }, + "id": "98ed1327-8787-4e28-99e2-2c15fb447298", + "metadata": {}, "source": [ - "### Add secret" + "**Get Saved Posts:** This block get all of the saved posts of your linkedIn profile " ] }, { "cell_type": "code", - "execution_count": null, - "id": "e235794a-6907-4f0e-ab2b-5aa6668a8d3b", + "execution_count": 34, + "id": "9f05e02c-f9ae-440a-bcc8-047ff0bd5e99", "metadata": { - "papermill": {}, + "execution": { + "iopub.execute_input": "2024-02-23T09:02:37.551212Z", + "iopub.status.busy": "2024-02-23T09:02:37.550974Z", + "iopub.status.idle": "2024-02-23T09:02:37.558896Z", + "shell.execute_reply": "2024-02-23T09:02:37.558314Z", + "shell.execute_reply.started": "2024-02-23T09:02:37.551187Z" + }, "tags": [] }, "outputs": [], "source": [ - "for secret in secrets:\n", - " new_value = secrets.get(secret)\n", - " if new_value:\n", - " naas.secret.add(secret, secrets.get(secret))" + "\n", + "def get_saved_posts_and_articles(url, headers, cookies):\n", + " \"\"\"\n", + " Make a request to LinkedIn API using the provided URL, headers, and cookies.\n", + "\n", + " Args:\n", + " - url (str): The URL to make the request to.\n", + " - headers (dict): Headers to include in the request.\n", + " - cookies (dict): Cookies to include in the request.\n", + "\n", + " Returns:\n", + " - urls list of all saved post and articles: Response content if the request is successful, None otherwise.\n", + " \"\"\"\n", + "\n", + " try:\n", + " all_save_urls = []\n", + " # Make the request\n", + " response = requests.get(url, headers=headers, cookies=cookies)\n", + " response.raise_for_status() # Raise an exception for 4xx or 5xx status codes\n", + "\n", + " # Check the response status code\n", + " if response.status_code == 200:\n", + " all_data = response.json()\n", + " for each in all_data.get('included'):\n", + " if each.get('navigationUrl'):\n", + " all_save_urls.append(each.get('navigationUrl'))\n", + " # Return the urls\n", + " return all_save_urls \n", + " else:\n", + " print(\"Error:\", response.status_code)\n", + " return None\n", + "\n", + " except requests.exceptions.RequestException as e:\n", + " print(\"Error:\", e)\n", + " print(\"Please provide a new li_at or JSESSIONID as the previous one has expired.\")\n", + " return None" ] }, { @@ -219,18 +302,40 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "4005faa2-a781-4805-9c08-74951e900d12", + "execution_count": 35, + "id": "18edeb44-7e6c-4ea9-97ea-7581eff0ae58", "metadata": { - "papermill": {}, + "execution": { + "iopub.execute_input": "2024-02-23T09:02:38.231733Z", + "iopub.status.busy": "2024-02-23T09:02:38.231505Z", + "iopub.status.idle": "2024-02-23T09:02:43.749047Z", + "shell.execute_reply": "2024-02-23T09:02:43.747061Z", + "shell.execute_reply.started": "2024-02-23T09:02:38.231709Z" + }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Error: Exceeded 30 redirects.\n", + "Please put new li_at or JSESSIONID. Previous one is expired\n", + "None\n" + ] + } + ], "source": [ - "df = naas.secret.list()\n", - "df = df[df[\"name\"].isin(secrets.keys())]\n", - "df" + "print(get_saved_posts_and_articles(url, headers, cookies))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66803641-1822-4894-a45b-83cae9b938a4", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -271,4 +376,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} From 0efa1fd568038e1f6da564b62a032e834cdb4f7d Mon Sep 17 00:00:00 2001 From: Florent Ravenel Date: Fri, 23 Feb 2024 15:06:42 +0100 Subject: [PATCH 3/4] feat: get saved posts URL --- LinkedIn/LinkedIn_Get_saved_posts_URL.ipynb | 306 ++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 LinkedIn/LinkedIn_Get_saved_posts_URL.ipynb diff --git a/LinkedIn/LinkedIn_Get_saved_posts_URL.ipynb b/LinkedIn/LinkedIn_Get_saved_posts_URL.ipynb new file mode 100644 index 0000000000..fe92949908 --- /dev/null +++ b/LinkedIn/LinkedIn_Get_saved_posts_URL.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9a08b929-eb2d-4382-940b-dcc7cc8cf223", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "\"LinkedIn.png\"" + ] + }, + { + "cell_type": "markdown", + "id": "a5b2c509-2c29-49e8-af91-4f3f1e386da3", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "# LinkedIn - Get saved posts URL\n", + "

Give Feedback | Bug report" + ] + }, + { + "cell_type": "markdown", + "id": "d77fe283-4edd-42d3-a909-8e207d4b842f", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Tags:** #linkedin #naas #posts #saved" + ] + }, + { + "cell_type": "markdown", + "id": "b90f2d91-c886-4e36-8265-b09d06bb1c7f", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Author:** [Sohaib Anwaar](https://www.linkedin.com/in/sohaibanwaar/)" + ] + }, + { + "cell_type": "markdown", + "id": "6bbf7807-dda4-4b8a-b016-fe258a0fa33f", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Last update:** 2024-02-23 (Created: 2024-02-23)" + ] + }, + { + "cell_type": "markdown", + "id": "214749fe-7f0b-4755-b7ea-1d200c234cc6", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**Description:** This notebook extracts saved posts and articles URL from your LinkedIn." + ] + }, + { + "cell_type": "markdown", + "id": "a1329bea-ad80-4981-874c-8776b11f89a8", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "**References:**\n", + "- [Learn how to get your cookies on LinkedIn](https://www.notion.so/LinkedIn-driver-Get-your-cookies-d20a8e7e508e42af8a5b52e33f3dba75)\n", + "- [Naas Secret Documentation](https://site.naas.ai/docs/developers/jobs/secret)" + ] + }, + { + "cell_type": "markdown", + "id": "cff349c8-2816-4ae4-9229-027c068eeb51", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Input" + ] + }, + { + "cell_type": "markdown", + "id": "80266a9e-fe54-4f3e-aeb2-01483bbc53f9", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Import libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cc038b10-2679-42bc-909e-09a298339df4", + "metadata": { + "papermill": {}, + "tags": [] + }, + "outputs": [], + "source": [ + "import naas\n", + "import json\n", + "import requests" + ] + }, + { + "cell_type": "markdown", + "id": "30127040-e487-4115-8317-5084823b3a6f", + "metadata": { + "papermill": {}, + "tags": [ + "variables" + ] + }, + "source": [ + "### Setup variables\n", + "**Mandatory**\n", + "- `li_at`: Cookie used to authenticate Members and API clients. This value will be stored under the secret 'LINKEDIN_LI_AT'.\n", + "- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation. This value will be stored under the secret 'LINKEDIN_JSESSIONID'." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "916a9fe8-c806-4542-94d5-2a7a7e5c8028", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Mandatory\n", + "li_at = naas.secret.get(\"LINKEDIN_LI_AT\") or \"YOUR_LINKEDIN_LI_AT\"\n", + "JSESSIONID = naas.secret.get(\"LINKEDIN_JSESSIONID\") or \"YOUR_LINKEDIN_JSESSIONID\"" + ] + }, + { + "cell_type": "markdown", + "id": "1b0ae044-cac2-479f-b1fc-a3318696adf5", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "markdown", + "id": "98ed1327-8787-4e28-99e2-2c15fb447298", + "metadata": {}, + "source": [ + "### Get Saved Posts URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f05e02c-f9ae-440a-bcc8-047ff0bd5e99", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def get_saved_posts_and_articles(\n", + " li_at,\n", + " JSESSIONID\n", + "):\n", + " # Define headers and cookies\n", + " headers = {\n", + " \"X-Li-Lang\": \"en_US\",\n", + " \"Accept\": \"application/vnd.linkedin.normalized+json+2.1\",\n", + " \"Cache-Control\": \"no-cache\",\n", + " \"X-Requested-With\": \"XMLHttpRequest\",\n", + " \"X-Restli-Protocol-Version\": \"2.0.0\",\n", + " \"Csrf-Token\": JSESSIONID\n", + " }\n", + " cookies = {\n", + " \"li_at\": li_at,\n", + " \"JSESSIONID\": JSESSIONID\n", + " }\n", + " # replace this with your url \n", + " url = \"https://www.linkedin.com//voyager/api/graphql?variables=(start:0,query:(flagshipSearchIntent:SEARCH_MY_ITEMS_SAVED_POSTS))&queryId=voyagerSearchDashClusters.a6589bc963659630adee73df22e9384c\"\n", + " \n", + " try:\n", + " all_save_urls = []\n", + " # Make the request\n", + " response = requests.get(url, headers=headers, cookies=cookies)\n", + " response.raise_for_status() # Raise an exception for 4xx or 5xx status codes\n", + "\n", + " # Check the response status code\n", + " if response.status_code == 200:\n", + " all_data = response.json()\n", + " for each in all_data.get('included'):\n", + " if each.get('navigationUrl'):\n", + " all_save_urls.append(each.get('navigationUrl'))\n", + " # Return the urls\n", + " return all_save_urls \n", + " else:\n", + " print(\"Error:\", response.status_code)\n", + " return None\n", + "\n", + " except requests.exceptions.RequestException as e:\n", + " print(\"Error:\", e)\n", + " print(\"Please provide a new li_at or JSESSIONID as the previous one has expired.\")\n", + " return None\n", + " \n", + "result = get_saved_posts_and_articles(li_at, JSESSIONID)\n", + "result" + ] + }, + { + "cell_type": "markdown", + "id": "7c97931b-4ce8-46f9-a70c-45393c5669ee", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "## Output" + ] + }, + { + "cell_type": "markdown", + "id": "760ec150-d79a-4208-adfe-d0336d443dc8", + "metadata": { + "papermill": {}, + "tags": [] + }, + "source": [ + "### Display result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18edeb44-7e6c-4ea9-97ea-7581eff0ae58", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66803641-1822-4894-a45b-83cae9b938a4", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + }, + "naas": { + "notebook_id": "2c8d11e1766e5b73434b99d74a5eb94415e244c13a00986210282bbc6117660c", + "notebook_path": "LinkedIn/LinkedIn_Setup_connection.ipynb" + }, + "papermill": { + "default_parameters": {}, + "environment_variables": {}, + "parameters": {}, + "version": "2.4.0" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From c9366e3a5c35a389a696557b53780d83c7027906 Mon Sep 17 00:00:00 2001 From: Florent Ravenel Date: Fri, 23 Feb 2024 15:08:42 +0100 Subject: [PATCH 4/4] fix: push init template setup connection --- LinkedIn/LinkedIn_Setup_connection.ipynb | 167 +++++------------------ 1 file changed, 31 insertions(+), 136 deletions(-) diff --git a/LinkedIn/LinkedIn_Setup_connection.ipynb b/LinkedIn/LinkedIn_Setup_connection.ipynb index 935c606104..90d9880084 100644 --- a/LinkedIn/LinkedIn_Setup_connection.ipynb +++ b/LinkedIn/LinkedIn_Setup_connection.ipynb @@ -104,26 +104,15 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "cc038b10-2679-42bc-909e-09a298339df4", "metadata": { - "execution": { - "iopub.execute_input": "2024-02-23T09:00:40.781448Z", - "iopub.status.busy": "2024-02-23T09:00:40.781212Z", - "iopub.status.idle": "2024-02-23T09:00:40.784559Z", - "shell.execute_reply": "2024-02-23T09:00:40.783873Z", - "shell.execute_reply.started": "2024-02-23T09:00:40.781425Z" - }, "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "import naas\n", - "import json\n", - "import requests\n", - "from naas_drivers import linkedin\n", - "from bs4 import BeautifulSoup" + "import naas" ] }, { @@ -141,44 +130,10 @@ "- `li_at`: Cookie used to authenticate Members and API clients. This value will be stored under the secret 'LINKEDIN_LI_AT'.\n", "- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation. This value will be stored under the secret 'LINKEDIN_JSESSIONID'.\n", "\n", - "\n", "**Optional**\n", "- `secrets`: Dict to add secrets to naas" ] }, - { - "cell_type": "code", - "execution_count": 30, - "id": "916a9fe8-c806-4542-94d5-2a7a7e5c8028", - "metadata": { - "execution": { - "iopub.execute_input": "2024-02-23T09:00:41.145098Z", - "iopub.status.busy": "2024-02-23T09:00:41.144865Z", - "iopub.status.idle": "2024-02-23T09:00:41.635713Z", - "shell.execute_reply": "2024-02-23T09:00:41.634866Z", - "shell.execute_reply.started": "2024-02-23T09:00:41.145073Z" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "👌 Well done! Your Secret has been sent to production. \n", - "\n", - "PS: to remove the \"Secret\" feature, just replace .add by .delete\n", - "👌 Well done! Your Secret has been sent to production. \n", - "\n", - "PS: to remove the \"Secret\" feature, just replace .add by .delete\n" - ] - } - ], - "source": [ - "li_at = 'Put you li_at token here'\n", - "JSESSIONID = 'Put your Jsession Id here'" - ] - }, { "cell_type": "code", "execution_count": null, @@ -191,18 +146,15 @@ }, "outputs": [], "source": [ - "# Define headers and cookies\n", - "headers = {\n", - " \"X-Li-Lang\": \"en_US\",\n", - " \"Accept\": \"application/vnd.linkedin.normalized+json+2.1\",\n", - " \"Cache-Control\": \"no-cache\",\n", - " \"X-Requested-With\": \"XMLHttpRequest\",\n", - " \"X-Restli-Protocol-Version\": \"2.0.0\",\n", - " \"Csrf-Token\":JSESSIONID\n", - "}\n", - "cookies = {\"li_at\": li_at, \"JSESSIONID\": JSESSIONID}\n", - "# replace this with your url \n", - "url = \"https://www.linkedin.com//voyager/api/graphql?variables=(start:0,query:(flagshipSearchIntent:SEARCH_MY_ITEMS_SAVED_POSTS))&queryId=voyagerSearchDashClusters.a6589bc963659630adee73df22e9384c\"\n" + "# Mandatory\n", + "li_at = None\n", + "JSESSIONID = None\n", + "\n", + "# Optional\n", + "secrets = {\n", + " \"LINKEDIN_LI_AT\": li_at,\n", + " \"LINKEDIN_JSESSIONID\": JSESSIONID,\n", + "}" ] }, { @@ -218,64 +170,29 @@ }, { "cell_type": "markdown", - "id": "98ed1327-8787-4e28-99e2-2c15fb447298", - "metadata": {}, + "id": "3aadae7f-fbe5-4f97-a6a9-6efe9dd1a42f", + "metadata": { + "papermill": {}, + "tags": [] + }, "source": [ - "**Get Saved Posts:** This block get all of the saved posts of your linkedIn profile " + "### Add secret" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "9f05e02c-f9ae-440a-bcc8-047ff0bd5e99", + "execution_count": null, + "id": "e235794a-6907-4f0e-ab2b-5aa6668a8d3b", "metadata": { - "execution": { - "iopub.execute_input": "2024-02-23T09:02:37.551212Z", - "iopub.status.busy": "2024-02-23T09:02:37.550974Z", - "iopub.status.idle": "2024-02-23T09:02:37.558896Z", - "shell.execute_reply": "2024-02-23T09:02:37.558314Z", - "shell.execute_reply.started": "2024-02-23T09:02:37.551187Z" - }, + "papermill": {}, "tags": [] }, "outputs": [], "source": [ - "\n", - "def get_saved_posts_and_articles(url, headers, cookies):\n", - " \"\"\"\n", - " Make a request to LinkedIn API using the provided URL, headers, and cookies.\n", - "\n", - " Args:\n", - " - url (str): The URL to make the request to.\n", - " - headers (dict): Headers to include in the request.\n", - " - cookies (dict): Cookies to include in the request.\n", - "\n", - " Returns:\n", - " - urls list of all saved post and articles: Response content if the request is successful, None otherwise.\n", - " \"\"\"\n", - "\n", - " try:\n", - " all_save_urls = []\n", - " # Make the request\n", - " response = requests.get(url, headers=headers, cookies=cookies)\n", - " response.raise_for_status() # Raise an exception for 4xx or 5xx status codes\n", - "\n", - " # Check the response status code\n", - " if response.status_code == 200:\n", - " all_data = response.json()\n", - " for each in all_data.get('included'):\n", - " if each.get('navigationUrl'):\n", - " all_save_urls.append(each.get('navigationUrl'))\n", - " # Return the urls\n", - " return all_save_urls \n", - " else:\n", - " print(\"Error:\", response.status_code)\n", - " return None\n", - "\n", - " except requests.exceptions.RequestException as e:\n", - " print(\"Error:\", e)\n", - " print(\"Please provide a new li_at or JSESSIONID as the previous one has expired.\")\n", - " return None" + "for secret in secrets:\n", + " new_value = secrets.get(secret)\n", + " if new_value:\n", + " naas.secret.add(secret, secrets.get(secret))" ] }, { @@ -302,40 +219,18 @@ }, { "cell_type": "code", - "execution_count": 35, - "id": "18edeb44-7e6c-4ea9-97ea-7581eff0ae58", + "execution_count": null, + "id": "4005faa2-a781-4805-9c08-74951e900d12", "metadata": { - "execution": { - "iopub.execute_input": "2024-02-23T09:02:38.231733Z", - "iopub.status.busy": "2024-02-23T09:02:38.231505Z", - "iopub.status.idle": "2024-02-23T09:02:43.749047Z", - "shell.execute_reply": "2024-02-23T09:02:43.747061Z", - "shell.execute_reply.started": "2024-02-23T09:02:38.231709Z" - }, + "papermill": {}, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Error: Exceeded 30 redirects.\n", - "Please put new li_at or JSESSIONID. Previous one is expired\n", - "None\n" - ] - } - ], + "outputs": [], "source": [ - "print(get_saved_posts_and_articles(url, headers, cookies))" + "df = naas.secret.list()\n", + "df = df[df[\"name\"].isin(secrets.keys())]\n", + "df" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "66803641-1822-4894-a45b-83cae9b938a4", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {