From 2a265465c3777d96531d40a892a8c03ac19d4b66 Mon Sep 17 00:00:00 2001 From: Alton Liew Date: Thu, 16 Nov 2023 06:21:21 +0100 Subject: [PATCH] feat: Get and print images using google cloud API key and CSE ID --- .../Google_Search_Find_Images_link.ipynb | 157 ++++++++++++++++-- 1 file changed, 141 insertions(+), 16 deletions(-) diff --git a/Google Search/Google_Search_Find_Images_link.ipynb b/Google Search/Google_Search_Find_Images_link.ipynb index c0dd198bb2..fc75d5f1fc 100644 --- a/Google Search/Google_Search_Find_Images_link.ipynb +++ b/Google Search/Google_Search_Find_Images_link.ipynb @@ -74,7 +74,9 @@ "tags": [] }, "source": [ - "**References:**\n- [Google Search Documentation](https://developers.google.com/search/docs/guides/intro-structured-data)\n- [Google Search Console](https://search.google.com/search-console/about)" + "**References:**\n", + "- [Google Search Documentation](https://developers.google.com/search/docs/guides/intro-structured-data)\n", + "- [Google Search Console](https://search.google.com/search-console/about)" ] }, { @@ -101,24 +103,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "ba3dbe0e-d857-432d-9eef-40e18253653e", "metadata": { + "execution": { + "iopub.execute_input": "2023-11-16T05:18:26.502745Z", + "iopub.status.busy": "2023-11-16T05:18:26.502474Z", + "iopub.status.idle": "2023-11-16T05:18:28.424101Z", + "shell.execute_reply": "2023-11-16T05:18:28.423473Z", + "shell.execute_reply.started": "2023-11-16T05:18:26.502673Z" + }, "papermill": {}, "tags": [] }, - "source": "import requests\nimport json", - "outputs": [] + "outputs": [], + "source": [ + "import requests\n", + "import naas" + ] + }, + { + "cell_type": "markdown", + "id": "4b7a8f50-760f-4abf-912a-f8db642f0f92", + "metadata": {}, + "source": [ + "### Setup variables" + ] }, { "cell_type": "markdown", - "id": "deb87ffa-e40b-492c-bd44-fc352e3c3e68", + "id": "27869816-bf38-4224-8a42-effe3d75713e", + "metadata": {}, + "source": [ + "- `API_KEY`: API key from Google Cloud Platform.\n", + "- `CSE_ID`: CSE ID from Programmable Search Engine.\n", + "- `query`: Represents the search query or term.\n", + "- `limit`: Represents the maximum number of search results to be returned.\n", + "- `start_index`: Assigned the integer value `1`. Represents the starting index of the search results." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "07fbeb5e-3eaf-4bcd-bf13-a648cac8e1a1", "metadata": { - "papermill": {}, + "execution": { + "iopub.execute_input": "2023-11-16T05:18:28.425918Z", + "iopub.status.busy": "2023-11-16T05:18:28.425653Z", + "iopub.status.idle": "2023-11-16T05:18:28.619506Z", + "shell.execute_reply": "2023-11-16T05:18:28.618908Z", + "shell.execute_reply.started": "2023-11-16T05:18:28.425887Z" + }, "tags": [] }, + "outputs": [], "source": [ - "### Setup variables\n- **query**: The query string to search for" + "API_KEY = naas.secret.get(\"API_KEY\")\n", + "CSE_ID = naas.secret.get(\"CSE_ID\")\n", + "query = 'bottle'\n", + "limit = 20\n", + "start_index = 1" ] }, { @@ -140,7 +184,7 @@ "tags": [] }, "source": [ - "### Find images link" + "### Get Google Images and Print Image Links Functions" ] }, { @@ -151,19 +195,55 @@ "tags": [] }, "source": [ - "This function will use Google Search to find images link related to the query string." + "`get_google_images`: \n", + "\n", + "- Constructs a URL using an f-string, incorporating the provided parameters, to make a request to the Google Custom Search API.\n", + "- It uses the requests.get() function to send a GET request to the constructed URL.\n", + " The response from the API is stored in the response variable.\n", + "- The content of the response is parsed as JSON using the response.json() method, and the resulting JSON data is stored in the data variable.\n", + "\n", + "`print_image_links`: \n", + "\n", + "- Checks if the key 'items' exists in the provided data.\n", + "- If 'items' is present, it iterates through each 'item' in the 'items' list of the provided data.\n", + "- Prints the 'link' value from the current item.\n", + "- Checks if the limit has reached zero. If so, it breaks out of the loop.\n", + "- If 'items' is not present in the data, it prints a message indicating that no image results were found." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "3b8ec91e-2cf0-4ed5-a2ba-c628115fdf31", "metadata": { + "execution": { + "iopub.execute_input": "2023-11-16T05:18:28.621166Z", + "iopub.status.busy": "2023-11-16T05:18:28.620704Z", + "iopub.status.idle": "2023-11-16T05:18:28.626269Z", + "shell.execute_reply": "2023-11-16T05:18:28.625777Z", + "shell.execute_reply.started": "2023-11-16T05:18:28.621131Z" + }, "papermill": {}, "tags": [] }, - "source": "def find_images_link(query):\n # Set the URL\n url = f\"https://www.googleapis.com/customsearch/v1?key=&cx=&q={query}&searchType=image\"\n\n # Make the request\n response = requests.get(url)\n\n # Parse the response\n data = json.loads(response.text)\n\n # Return the list of images link\n return [item[\"link\"] for item in data[\"items\"]]", - "outputs": [] + "outputs": [], + "source": [ + "def get_google_images(API_KEY, CSE_ID, query, start_index):\n", + " url = f'https://www.googleapis.com/customsearch/v1?q={query}&key={API_KEY}&cx={CSE_ID}&searchType=image&start={start_index}'\n", + " response = requests.get(url)\n", + " data = response.json()\n", + " return data\n", + "\n", + "def print_image_links(data, limit):\n", + " if 'items' in data:\n", + " for item in data['items']:\n", + " print(item['link'])\n", + " limit -= 1\n", + " if limit == 0:\n", + " break\n", + " else:\n", + " print('No image results found.')" + ] }, { "cell_type": "markdown", @@ -187,16 +267,61 @@ "### Display result" ] }, + { + "cell_type": "markdown", + "id": "3c4aa789-8fe5-4f36-85f1-483777c0efbe", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-15T15:06:51.242286Z", + "iopub.status.busy": "2023-11-15T15:06:51.242043Z", + "iopub.status.idle": "2023-11-15T15:06:51.252616Z", + "shell.execute_reply": "2023-11-15T15:06:51.251573Z", + "shell.execute_reply.started": "2023-11-15T15:06:51.242261Z" + } + }, + "source": [ + "- While limit is more than zero, iterate through.\n", + "- Calls the `get_google_images` function and places the returned data in variable \"data\".\n", + "- Print image links with the `print_image_links`function.\n", + "- The start_index is incremented by 10 to fetch the next set of results in the next iteration.\n", + "- The limit is decreased by the minimum of 10 or the current limit value to ensure it doesn't go below 0.\n", + "- It checks if the keys 'queries' or 'nextPage' are not present in the data. If either is missing, it breaks out of the loop, terminating the fetching process." + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a63721db-baba-416d-8ef7-8be84327c38b", "metadata": { + "execution": { + "iopub.execute_input": "2023-11-16T05:18:28.627470Z", + "iopub.status.busy": "2023-11-16T05:18:28.627195Z", + "iopub.status.idle": "2023-11-16T05:18:28.856903Z", + "shell.execute_reply": "2023-11-16T05:18:28.856241Z", + "shell.execute_reply.started": "2023-11-16T05:18:28.627439Z" + }, "papermill": {}, "tags": [] }, - "source": "# Set the query string\nquery = \"Google Search\"\n# Find the images link\nimages_link = find_images_link(query)\n# Print the images link\nprint(images_link)", - "outputs": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No image results found.\n" + ] + } + ], + "source": [ + "while limit > 0:\n", + " data = get_google_images(API_KEY, CSE_ID, query, start_index)\n", + " print_image_links(data, limit)\n", + " start_index += 10\n", + " limit -= min(10, limit)\n", + " \n", + " if 'queries' not in data or 'nextPage' not in data['queries']:\n", + " break" + ] } ], "metadata": { @@ -227,4 +352,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +}