Skip to content

Commit

Permalink
Improve Nomic Visualizations with full conversations & date filtering (
Browse files Browse the repository at this point in the history
…#72)

* new PR

* added onResponseCompletion endpoint

* nomic log test push

* nomic log test push 2

* nomic changed to POST

* nomic changed back to GET

* nomic changed to POST

* nomic moved print statements

* adding proper variables to /web-scrape endpoint

* fix formatting in code formatting action (use double quotes)

* added prints for test

* added prints for test

* added print for response body

* modified logging function WIP

* modified logging function WIP

* modified logging function WIP

* added print for response body

* added print for response json

* added print statements

* added print statements

* added print statements

* added print statements

* iterating over response

* extracting relevant data

* extracting relevant data

* printing response data

* printing response data type

* iterating over response

* extracting messages

* extracting messages

* added function for logging single-turn conversation

* completed workflow for multi-turn and map creation

* changed method to POST

* added print statements on endpoint

* added get_json()

* added get_json()

* investigate data

* investigate data

* removed jsonify(response)

* added print statements in create_map()

* added print statements in create_map()

* added print statements in create_map()

* added more print statements in create_map()

* added emoji to metadata

* handled emoji error

* added missing emoji

* replace int id with time.time()

* handled ID type errors

* added timestamps to metadata

* removed test files

* testing for json error

* testing for json error

* testing index errors

* fixed timestamp formats for nomic logs

* checking for JSON serialization errors

* checking for JSON serialization errors

* typecasted previous convo ID to string for deletion

* Yapf format ONLY

* revert all files with no changes except whitespace on newlines

* cleanup prints, looks good to me

* minor cleanup of when logging happens

---------

Co-authored-by: Kastan Day <[email protected]>
Co-authored-by: root <root@ASMITA>
  • Loading branch information
3 people authored Sep 15, 2023
1 parent 09010b5 commit 520444a
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 54 deletions.
30 changes: 24 additions & 6 deletions ai_ta_backend/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import gc
import json
import os
import time
from typing import List
Expand All @@ -9,7 +10,7 @@
from flask_executor import Executor
from sqlalchemy import JSON

from ai_ta_backend.nomic_logging import get_nomic_map, log_query_to_nomic
from ai_ta_backend.nomic_logging import get_nomic_map, log_convo_to_nomic
from ai_ta_backend.vector_database import Ingest
from ai_ta_backend.web_scrape import main_crawler, mit_course_download

Expand Down Expand Up @@ -137,16 +138,13 @@ def getTopContexts() -> Response:
abort(
400,
description=
f"Missing one or me required parameters: 'search_query' and 'course_name' must be provided. Search query: `{search_query}`, Course name: `{course_name}`"
f"Missing one or more required parameters: 'search_query' and 'course_name' must be provided. Search query: `{search_query}`, Course name: `{course_name}`"
)

ingester = Ingest()
found_documents = ingester.getTopContexts(search_query, course_name, token_limit)
del ingester

# background execution of tasks!!
executor.submit(log_query_to_nomic, course_name, search_query)

response = jsonify(found_documents)
response.headers.add('Access-Control-Allow-Origin', '*')
return response
Expand Down Expand Up @@ -342,6 +340,7 @@ def scrape() -> Response:
print(f"Max Urls: {max_urls}")
print(f"Max Depth: {max_depth}")
print(f"Timeout in Seconds ⏰: {timeout}")
print(f"Stay on baseurl: {stay_on_baseurl}")

success_fail_dict = main_crawler(url, course_name, max_urls, max_depth, timeout, stay_on_baseurl)

Expand All @@ -350,7 +349,6 @@ def scrape() -> Response:
gc.collect() # manually invoke garbage collection, try to reduce memory on Railway $$$
return response


@app.route('/mit-download', methods=['GET'])
def mit_download_course() -> Response:
""" Web scraper built for
Expand Down Expand Up @@ -392,6 +390,26 @@ def nomic_map():
response.headers.add('Access-Control-Allow-Origin', '*')
return response

@app.route('/onResponseCompletion', methods=['POST'])
def logToNomic():
data = request.get_json()
course_name = data['course_name']
conversation = data['conversation']
if course_name == '' or conversation == '':
# proper web error "400 Bad request"
abort(
400,
description=
f"Missing one or more required parameters: 'course_name' and 'conversation' must be provided. Course name: `{course_name}`, Conversation: `{conversation}`"
)
print(f"In /onResponseCompletion for course: {course_name}")

# background execution of tasks!!
response = executor.submit(log_convo_to_nomic, course_name, data)
response = jsonify({'outcome': 'success'})
response.headers.add('Access-Control-Allow-Origin', '*')
return response


if __name__ == '__main__':
app.run(debug=True, port=int(os.getenv("PORT", default=8000)))
Loading

0 comments on commit 520444a

Please sign in to comment.