Skip to content

Commit

Permalink
[Google] Reverse search with google lens
Browse files Browse the repository at this point in the history
  • Loading branch information
npc203 committed Aug 7, 2024
1 parent e73ce06 commit d6dab22
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 45 deletions.
105 changes: 60 additions & 45 deletions google/google.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import asyncio
import functools
import json
import logging
import re
from datetime import datetime, timezone
from textwrap import shorten
from typing import Optional
from urllib.parse import quote_plus, urlencode

import aiohttp
import discord
import js2py
from bs4 import BeautifulSoup
from html2text import html2text as h2t
from redbot.core import commands
Expand All @@ -18,6 +21,8 @@
from .utils import ResultMenu, Source, get_card, get_query, nsfwcheck, s
from .yandex import Yandex

logger = logging.getLogger("red.google")

# TODO Add optional way to use from google search api


Expand All @@ -40,8 +45,8 @@ def __init__(self, bot: Red) -> None:
)
self.session = aiohttp.ClientSession()

def cog_unload(self):
asyncio.create_task(self.session.close())
async def cog_unload(self):
await self.session.close()

def format_help_for_context(self, ctx: commands.Context) -> str:
"""Thanks Sinbad!"""
Expand All @@ -51,7 +56,7 @@ def format_help_for_context(self, ctx: commands.Context) -> str:

@commands.group(invoke_without_command=True)
@commands.bot_has_permissions(embed_links=True, add_reactions=True)
async def google(self, ctx, *, query: str = None):
async def google(self, ctx, *, query: Optional[str] = None):
"""Google search your query from Discord channel."""
if not query:
return await ctx.send("Please enter something to search")
Expand Down Expand Up @@ -234,7 +239,7 @@ async def book(self, ctx, *, query: str):
await ResultMenu(source=Source(pages, per_page=1)).start(ctx)

@google.command()
async def doodle(self, ctx, month: int = None, year: int = None):
async def doodle(self, ctx, month: Optional[int] = None, year: Optional[int] = None):
"""Responds with Google doodles of the current month.
Or doodles of specific month/year if `month` and `year` values are provided.
Expand Down Expand Up @@ -275,7 +280,7 @@ async def doodle(self, ctx, month: int = None, year: int = None):
await ResultMenu(source=Source(pages, per_page=1)).start(ctx)

@google.command(aliases=["img"])
async def image(self, ctx, *, query: str = None):
async def image(self, ctx, *, query: Optional[str] = None):
"""Search google images from discord"""
if not query:
await ctx.send("Please enter some image name to search")
Expand All @@ -286,15 +291,15 @@ async def image(self, ctx, *, query: str = None):
size = len(response)

class ImgSource(menus.ListPageSource):
async def format_page(self, menu, entry):
async def format_page(self, menu, page):
return (
discord.Embed(
title=f"Pages: {menu.current_page+1}/{size}",
color=await ctx.embed_color(),
description="Some images might not be visible.",
url=kwargs["redir"],
)
.set_image(url=entry)
.set_image(url=page)
.set_footer(text=f"Safe Search: {not isnsfw}")
)

Expand All @@ -303,8 +308,8 @@ async def format_page(self, menu, entry):
else:
await ctx.send("No result")

@google.command(aliases=["rev"], enabled=False)
async def reverse(self, ctx, *, url: str = None):
@google.command(aliases=["rev"], enabled=True)
async def reverse(self, ctx, *, url: Optional[str] = None):
"""Attach or paste the url of an image to reverse search, or reply to a message which has the image/embed with the image"""
isnsfw = nsfwcheck(ctx)
if query := get_query(ctx, url):
Expand All @@ -313,46 +318,35 @@ async def reverse(self, ctx, *, url: str = None):
return await ctx.send_help()

encoded = {
"image_url": query,
"encoded_image": None,
"image_content": None,
"filename": None,
"hl": "en",
"url": query,
}

final_url = "http://lens.google.com/uploadbyurl?" + urlencode(encoded)
async with ctx.typing():
async with self.session.get(
"https://www.google.com/searchbyimage?" + urlencode(encoded),
final_url,
headers=self.options,
) as resp:
text = await resp.read()
redir_url = resp.url
prep = functools.partial(self.reverse_search, text)
result, (response, kwargs) = await self.bot.loop.run_in_executor(None, prep)
results = await self.bot.loop.run_in_executor(None, prep)
pages = []
if response:
groups = [response[n : n + 3] for n in range(0, len(response), 3)]
for num, group in enumerate(groups, 1):
if results:
for num, res in enumerate(results, 1):
emb = discord.Embed(
title="Google Reverse Image Search",
description="[`"
+ (result or "Nothing significant found")
+ f"`]({redir_url})",
description=f"[`{res['domain_name']}`]({res['orig_url']})",
color=await ctx.embed_color(),
)
for i in group:
desc = (f"[{i.url[:60]}]({i.url})\n" if i.url else "") + f"{i.desc}"[:1024]
emb.add_field(
name=f"{i.title}",
value=desc or "Nothing",
inline=False,
)
emb.set_footer(
text=f"Safe Search: {not isnsfw} | "
+ kwargs["stats"].replace("\n", " ")
+ f"| Page: {num}/{len(groups)}"
# TODO maybe constraint, clip this to 1024
emb.add_field(
name=res["title"],
value=res["orig_url"],
inline=False,
)
emb.set_thumbnail(url=encoded["image_url"])
emb.set_footer(text=f"Page: {num}/{len(results)}")
emb.set_thumbnail(url=res["icon_url"])
emb.set_image(url=res["image_url"])
pages.append(emb)
if pages:
await ResultMenu(source=Source(pages, per_page=1)).start(ctx)
Expand All @@ -362,7 +356,7 @@ async def reverse(self, ctx, *, url: str = None):
title="Google Reverse Image Search",
description="[`" + ("Nothing significant found") + f"`]({redir_url})",
color=await ctx.embed_color(),
).set_thumbnail(url=encoded["image_url"])
).set_thumbnail(url=encoded["url"])
)

@commands.is_owner()
Expand All @@ -381,7 +375,6 @@ async def get_result(self, query, images=False, nsfw=False):

async def get_html(url, encoded):
async with self.session.get(url + encoded, headers=self.options) as resp:
self.cookies = resp.cookies
return await resp.text(), resp.url

if not nsfw:
Expand All @@ -402,13 +395,35 @@ async def get_html(url, encoded):

def reverse_search(self, text):
soup = BeautifulSoup(text, features="html.parser")
if check := soup.find("div", class_="card-section"):
if "The URL doesn't refer" in check.text:
return check.text, (None, None)
if res := soup.find("input", class_="gLFyf gsfi"):
return res["value"], (self.parser_text(text, soup=soup, cards=False) or (None, None))

return None, (None, None)
all_scripts = soup.findAll("script", {"nonce": True})
txts = []
for tag in all_scripts:
txt = tag.get_text()
if txt.startswith("AF_initDataCallback("):
txts.append(txt)

fin_data = []
for txt in txts:
if "https://encrypted-tbn" in txt:
txt = txt.replace("AF_initDataCallback", "var result = JSON.stringify", 1)
context = js2py.EvalJs()
context.execute(txt)
data = json.loads(context.result)
for item in data["data"][1][0][1][8][8][0][12]:
try:
fin_data.append(
{
"title": item[3],
"orig_url": item[5],
"domain_name": item[14],
"image_url": item[0][0],
"icon_url": item[15][0],
}
)
except IndexError as e:
# Silently ignore this for now
logger.debug(e)
return fin_data

def parser_text(self, text, soup=None, cards: bool = True):
"""My bad logic for scraping"""
Expand All @@ -431,7 +446,7 @@ def parser_text(self, text, soup=None, cards: bool = True):
else:
url = None
title = None
if desc := res.select_one("div.Z26q7c>div.VwiC3b"):
if desc := res.select_one("div.kb0PBd>div.VwiC3b"):
desc = h2t(desc.text)[:500]
else:
desc = "Not found"
Expand Down
1 change: 1 addition & 0 deletions google/info.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"requirements": [
"html2text",
"beautifulsoup4",
"js2py",
"git+https://github.com/npc203/redbot-ext-menus-views"
],
"tags": [
Expand Down
1 change: 1 addition & 0 deletions google/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def get_url(msg_obj, check=False):
return None if check else msg_obj.content.lstrip("<").rstrip(">")


# TODO improve this?
def check_url(url: str):
# Helper function to check if valid url or not
return url.startswith("http") and " " not in url
Expand Down

0 comments on commit d6dab22

Please sign in to comment.