Skip to content

Commit

Permalink
link suggestions functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobryas4 committed Sep 15, 2023
1 parent e3d055f commit 5fe112e
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 15 deletions.
26 changes: 13 additions & 13 deletions index/dapps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# to build the index for dapps, first scrap them from dapplist.com using the scraper
# then run: python -c "from index.dapps import backfill; backfill()"
# to build the index for dapps, first scrape them using the scraper
# then run: python3 -c "from index.dapps import backfill; backfill()"


from langchain.docstore.document import Document
Expand Down Expand Up @@ -39,37 +39,37 @@ def create_schema(delete_first: bool = False) -> None:
}
},
"properties": [
{"name": DAPP_NAME, "dataType": ["string"]},
{"name": DAPP_DESCRIPTION, "dataType": ["string"]},
{"name": DAPP_URL, "dataType": ["string"]},
{"name": DAPP_NAME, "dataType": ["text"]},
{"name": DAPP_DESCRIPTION, "dataType": ["text"]},
{"name": DAPP_URL, "dataType": ["text"]},
{
"name": "twitterHandle",
"dataType": ["string"],
"dataType": ["text"],
"description": "The Twitter handle of the Dapp"
},
{
"name": "blogLinks",
"dataType": ["string[]"],
"dataType": ["text[]"],
"description": "Links to the blog posts related to the Dapp"
},
{
"name": "discord",
"dataType": ["string"],
"dataType": ["text"],
"description": "The Discord server link of the Dapp"
},
{
"name": "facebook",
"dataType": ["string"],
"dataType": ["text"],
"description": "The Facebook page link of the Dapp"
},
{
"name": "instagram",
"dataType": ["string"],
"dataType": ["text"],
"description": "The Instagram profile link of the Dapp"
},
{
"name": "telegram",
"dataType": ["string"],
"dataType": ["text"],
"description": "The Telegram channel link of the Dapp"
}
]
Expand All @@ -85,10 +85,10 @@ def backfill():
try:
from langchain.vectorstores import Weaviate

with open('./knowledge_base/dapp-list.json') as f:
with open('./knowledge_base/dapps_ranked.json') as f:
dapp_list = json.load(f)

# Extract the 'id' field from each dapp and store it in the 'documents' list
# Extract the 'name' field from each dapp and store it in the 'documents' list
documents = [d.pop("name") for d in dapp_list]

# Use the remaining fields in each dapp to populate the 'metadatas' list
Expand Down
31 changes: 31 additions & 0 deletions scrape/dapp_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,34 @@ def clean_payload_data(original_data):
return reduced_data


def load_data_from_json_to_db(session, json_path):
# 1. Setup
# If the table doesn't exist, create it
# Base.metadata.create_all(session.bind) Dont need this - jacob b

# 2. Data Loading

# Read the JSON data
with open(json_path, "r") as file:
dapps_data = json.load(file)

# Loop through the JSON data and insert each entry into the database
for dapp in dapps_data:
dapp_instance = DApp(
description=dapp["description"],
name=dapp["name"],
url=dapp["url"],
twitter_handle=dapp["twitterHandle"],
blog_links=dapp["blogLinks"],
discord=dapp["discord"],
facebook=dapp["facebook"],
instagram=dapp["instagram"],
telegram=dapp["telegram"]
)
session.add(dapp_instance)

# 3. Finalization

# Commit the transactions
session.commit()

18 changes: 17 additions & 1 deletion scrape/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from sqlalchemy.orm import ( # type: ignore
scoped_session, sessionmaker, relationship,
backref)
from sqlalchemy.dialects.postgresql import UUID, JSONB
from sqlalchemy.dialects.postgresql import UUID, JSONB, ARRAY, TEXT
from sqlalchemy.ext.declarative import declarative_base # type: ignore
from sqlalchemy_utils import ChoiceType, Timestamp # type: ignore

Expand All @@ -35,3 +35,19 @@ class ScrapedUrl(Base, Timestamp): # type: ignore
data = Column(JSONB, nullable=False)

Index('scraped_url_lookup', url, unique=True)

class Dapp(Base):
__tablename__ = 'dapps'

id = Column(Integer, primary_key=True, autoincrement=True)
description = Column(TEXT, nullable=False)
name = Column(String(255), nullable=False, unique=True)
url = Column(String(255), nullable=False)
twitter_handle = Column(String(255), nullable=True)
blog_links = Column(ARRAY(String(255)), nullable=True)
discord = Column(String(255), nullable=True)
facebook = Column(String(255), nullable=True)
instagram = Column(String(255), nullable=True)
telegram = Column(String(255), nullable=True)

Index('dapp_name_url_index', 'name', 'url', unique=True)
2 changes: 1 addition & 1 deletion tools/index_answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

TEMPLATE = '''You are a web3 assistant. You help users with answering web3-related questions. Your responses should sound natural, helpful, cheerful, and engaging, and you should use easy to understand language with explanations for jargon.
Information to help complete your task is below. Only use information below to answer the question, and create a final answer with inline citations linked to the provided source URLs. If you don't know the answer, just say that you don't know. Don't try to make up an answer. ALWAYS return a "SOURCES" part in your answer corresponding to the numbered inline citations.
Information to help complete your task is below. Only use information below to answer the question, and create a final answer with inline citations linked to the provided source URLs. If you don't know the answer, just say that you don't know. Don't try to make up an answer. ALWAYS return a "SOURCES" part in your answer corresponding to the numbered inline citations. ALWAYS provide a link to each citation in SOURCES.
---
{task_info}
---
Expand Down

0 comments on commit 5fe112e

Please sign in to comment.