From 7a7169ae4602d63875c25ebeebf576453119ab67 Mon Sep 17 00:00:00 2001 From: Jacob Bryant Date: Tue, 26 Sep 2023 16:17:57 -0400 Subject: [PATCH 1/4] migration for link suggestion and less verbose alembic-check --- .github/workflows/alembic-check.yaml | 4 +- .../versions/b060d9ab3367_add_bar_table.py | 44 +++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 alembic/versions/b060d9ab3367_add_bar_table.py diff --git a/.github/workflows/alembic-check.yaml b/.github/workflows/alembic-check.yaml index 7688f1a0..95be675b 100644 --- a/.github/workflows/alembic-check.yaml +++ b/.github/workflows/alembic-check.yaml @@ -73,9 +73,9 @@ jobs: run: | GCP_SSH_CMD="gcloud compute ssh cacti-bastion-server --zone us-east1-b --ssh-key-file /tmp/gcp/google_compute_engine --quiet --tunnel-through-iap --ssh-flag" if [[ ${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}} == 'master' ]]; then - $GCP_SSH_CMD '-vvv -fN -L 5432:${{ secrets.PROD_CHATDB_INTERNAL_IP }}' + $GCP_SSH_CMD '-fN -L 5432:${{ secrets.PROD_CHATDB_INTERNAL_IP }}' else - $GCP_SSH_CMD '-vvv -fN -L 5432:${{ secrets.DEV_CHATDB_INTERNAL_IP }}' + $GCP_SSH_CMD '-fN -L 5432:${{ secrets.DEV_CHATDB_INTERNAL_IP }}' fi cd backend diff --git a/alembic/versions/b060d9ab3367_add_bar_table.py b/alembic/versions/b060d9ab3367_add_bar_table.py new file mode 100644 index 00000000..96c0cbde --- /dev/null +++ b/alembic/versions/b060d9ab3367_add_bar_table.py @@ -0,0 +1,44 @@ +"""Add Bar table + +Revision ID: b060d9ab3367 +Revises: 7cc8cbfe072d +Create Date: 2023-09-26 15:58:57.060974 + +""" +from alembic import op +import sqlalchemy as sa +import sqlalchemy_utils +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'b060d9ab3367' +down_revision = '7cc8cbfe072d' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('dapp') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('dapp', + sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), + sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), + sa.Column('id', postgresql.UUID(), autoincrement=False, nullable=False), + sa.Column('description', sa.TEXT(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column('url', sa.VARCHAR(length=255), autoincrement=False, nullable=False), + sa.Column('twitter_handle', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('blog_links', postgresql.ARRAY(sa.VARCHAR(length=255)), autoincrement=False, nullable=True), + sa.Column('discord', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('facebook', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('instagram', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.Column('telegram', sa.VARCHAR(length=255), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('id', name='dapp_pkey'), + sa.UniqueConstraint('name', name='dapp_name_key') + ) + # ### end Alembic commands ### From 389c334a064d0fc9b4ea53ef25d0a69b9b7416f4 Mon Sep 17 00:00:00 2001 From: Jacob Bryant Date: Wed, 27 Sep 2023 09:55:30 -0400 Subject: [PATCH 2/4] removing Dapp model and related bits --- scrape/dapp_scraper.py | 43 ------------------------------------------ scrape/models.py | 15 --------------- 2 files changed, 58 deletions(-) diff --git a/scrape/dapp_scraper.py b/scrape/dapp_scraper.py index 2950a3ea..47c41d7a 100644 --- a/scrape/dapp_scraper.py +++ b/scrape/dapp_scraper.py @@ -8,11 +8,6 @@ import json from typing import List -from scrape.models import ( - db_session, - Dapp -) - BROWSERLESS_API_KEY = os.getenv('BROWSERLESS_API_KEY', '') SCRAPE_API_URL = f'https://chrome.browserless.io/scrape?token={BROWSERLESS_API_KEY}' @@ -172,44 +167,6 @@ def clean_payload_data(original_data): return reduced_data - -def load_data_from_json_to_db(session=db_session, json_path=dapps_json_path): - print("Loading data from JSON to DB") - # 1. Setup - # If the table doesn't exist, create it - # Base.metadata.create_all(session.bind) Dont need this - jacob b - - # 2. Data Loading - - # Read the JSON data - with open(json_path, "r") as file: - dapps_data = json.load(file) - - # Loop through the JSON data and insert each entry into the database - for dapp in dapps_data: - print(f'adding {dapp["name"]}') - dapp_instance = Dapp( - description=dapp["description"], - name=dapp["name"], - url=dapp["url"], - twitter_handle=dapp["twitterHandle"], - blog_links=dapp["blogLinks"], - discord=dapp["discord"], - facebook=dapp["facebook"], - instagram=dapp["instagram"], - telegram=dapp["telegram"] - ) - session.add(dapp_instance) - - # 3. Finalization - - # Commit the transactions - session.commit() - - print("Finished loading data from JSON to DB") - - - if __name__ == "__main__": # create an ArgumentParser instance diff --git a/scrape/models.py b/scrape/models.py index 5a504c60..cbcaf080 100644 --- a/scrape/models.py +++ b/scrape/models.py @@ -36,18 +36,3 @@ class ScrapedUrl(Base, Timestamp): # type: ignore Index('scraped_url_lookup', url, unique=True) -class Dapp(Base, Timestamp): - __tablename__ = 'dapp' - - id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4) - description = Column(TEXT, nullable=False) - name = Column(String(255), nullable=False, unique=True) - url = Column(String(255), nullable=False) - twitter_handle = Column(String(255), nullable=True) - blog_links = Column(ARRAY(String(255)), nullable=True) - discord = Column(String(255), nullable=True) - facebook = Column(String(255), nullable=True) - instagram = Column(String(255), nullable=True) - telegram = Column(String(255), nullable=True) - - Index('dapp_by_name', 'name', unique=True) From 9a5c9dbb3a74534e2142fa87dd8d8a82606a39c4 Mon Sep 17 00:00:00 2001 From: Jacob Bryant Date: Wed, 27 Sep 2023 18:21:05 -0400 Subject: [PATCH 3/4] removing Dapp class, alembic file, updated fetch_app_info --- .../versions/b060d9ab3367_add_bar_table.py | 44 ------------------- index/dapps.py | 2 +- knowledge_base/widgets.yaml | 13 +++--- 3 files changed, 9 insertions(+), 50 deletions(-) delete mode 100644 alembic/versions/b060d9ab3367_add_bar_table.py diff --git a/alembic/versions/b060d9ab3367_add_bar_table.py b/alembic/versions/b060d9ab3367_add_bar_table.py deleted file mode 100644 index 96c0cbde..00000000 --- a/alembic/versions/b060d9ab3367_add_bar_table.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Add Bar table - -Revision ID: b060d9ab3367 -Revises: 7cc8cbfe072d -Create Date: 2023-09-26 15:58:57.060974 - -""" -from alembic import op -import sqlalchemy as sa -import sqlalchemy_utils -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'b060d9ab3367' -down_revision = '7cc8cbfe072d' -branch_labels = None -depends_on = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('dapp') - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.create_table('dapp', - sa.Column('created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), - sa.Column('updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=False), - sa.Column('id', postgresql.UUID(), autoincrement=False, nullable=False), - sa.Column('description', sa.TEXT(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(length=255), autoincrement=False, nullable=False), - sa.Column('url', sa.VARCHAR(length=255), autoincrement=False, nullable=False), - sa.Column('twitter_handle', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.Column('blog_links', postgresql.ARRAY(sa.VARCHAR(length=255)), autoincrement=False, nullable=True), - sa.Column('discord', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.Column('facebook', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.Column('instagram', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.Column('telegram', sa.VARCHAR(length=255), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('id', name='dapp_pkey'), - sa.UniqueConstraint('name', name='dapp_name_key') - ) - # ### end Alembic commands ### diff --git a/index/dapps.py b/index/dapps.py index 588e2d7c..3061d79b 100644 --- a/index/dapps.py +++ b/index/dapps.py @@ -68,7 +68,7 @@ def backfill(): with open('./knowledge_base/dapps_ranked_unique.json') as f: dapp_list = json.load(f) - documents = [d.pop("description") for d in dapp_list] + documents = [d.get("description") for d in dapp_list] metadatas = dapp_list diff --git a/knowledge_base/widgets.yaml b/knowledge_base/widgets.yaml index acb20f33..09d94d4b 100644 --- a/knowledge_base/widgets.yaml +++ b/knowledge_base/widgets.yaml @@ -427,11 +427,14 @@ type: object return_value_description: '' - _name_: fetch_app_info - description: Used when we need to handle common questions and answers about the - chat assistant app, what it can do, how to interact with it, at a high-level. - Only useful for questions about the chat app experience. It does not know specific - information about the web3 ecosystem, of tokens or NFTs or contracts, or access - to live data and APIs. + description: This function is invoked exclusively to address questions pertaining to the capabilities, + features, and interactions related to the chat assistant application itself. Users may + inquire about the chat app's high-level functionalities, its operational attributes, or + best practices for engagement. However, this function should NOT be triggered for queries + related to specific details of the web3 ecosystem, such as tokens, NFTs, contracts, or any + live data and API access. Ensure that the function responds solely to inquiries about + the chat assistant app's core capabilities and user experience. + parameters: properties: query: From e7204c36f2e6f2f485e31e70d3e657f8f5ddeb73 Mon Sep 17 00:00:00 2001 From: Jacob Bryant Date: Wed, 27 Sep 2023 19:47:12 -0400 Subject: [PATCH 4/4] improved link suggestion prompt --- tools/index_link_suggestion.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/index_link_suggestion.py b/tools/index_link_suggestion.py index beeaf45f..93913308 100644 --- a/tools/index_link_suggestion.py +++ b/tools/index_link_suggestion.py @@ -12,13 +12,22 @@ import utils.timing as timing -TEMPLATE = '''You are a web3 assistant. You help users with answering web3-related questions. Your responses should sound natural, helpful, cheerful, and engaging, and you should use easy to understand language with explanations for jargon. +TEMPLATE = '''**Immediate Action & Review Needed**: Every time you mention specific platforms, tools, technologies, or any topic deserving of a URL, you **must** incorporate it into the text using markdown-style linking. There are two ways to do this: -Information to help complete your task is below. Only use the information below to answer the question. If you don't know the answer, just say that you don't know. Don't try to make up an answer. +1. Seamlessly embed the URL into descriptive text. +2. If you need to specify the exact URL for clarity, make sure it is still formatted in markdown. -When mentioning specific platforms, tools, or technologies, it's crucial to provide a relevant URL. Ensure this URL is seamlessly integrated into the content of the answer using markdown formatting. The link should feel like a natural part of the sentence. +Here's your blueprint: -For example: One of the leading platforms in the web3 space is [Ethereum](https://www.ethereum.org/), which offers a decentralized platform for building smart contracts and dapps." +**Correct - Embedded**: Learn more about [Ethereum](https://www.ethereum.org/). +**Correct - Explicit**: Visit the Ethereum website at [https://www.ethereum.org/](https://www.ethereum.org/). +**Incorrect**: Learn more at https://www.ethereum.org/ or "Visit the Ethereum website here: https://www.ethereum.org/". + +Being a web3 assistant, aim to deliver answers that are clear, engaging, and most importantly, user-friendly. Web3 topics can be intricate, so your goal is to be the bridge to understanding. Always simplify jargon and ensure URLs are user-friendly and clickable. + +Before finalizing any response, stop and verify: "Did I format all URLs in markdown?" + +If you can't provide an answer, it's perfectly fine to admit it. But regardless of the content of your response, ensure all URLs are **formatted correctly**. --- {task_info} ---