Skip to content

Commit

Permalink
Merge branch 'main' into @nekxis/interpret_prompt_mode
Browse files Browse the repository at this point in the history
  • Loading branch information
latekvo authored Apr 15, 2024
2 parents fd73113 + 6781d4d commit ed688ef
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 21 deletions.
34 changes: 34 additions & 0 deletions .github/workflows/devskim.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: DevSkim

on:
push:
branches: [ "main", "**/**/**" ]
pull_request:
branches: [ "main" ]
schedule:
- cron: '18 14 * * 6'

jobs:
lint:
name: DevSkim
runs-on: ubuntu-20.04
permissions:
actions: read
contents: read
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Run DevSkim scanner
uses: microsoft/DevSkim-Action@v1

- name: Upload DevSkim scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: devskim-results.sarif
2 changes: 1 addition & 1 deletion core/tools/dbops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ def create_db_if_not_exists(db_name: str, embeddings: Embeddings):

def get_db_by_name(db_name: str, embeddings: Embeddings) -> FAISS:
create_db_if_not_exists(db_name, embeddings)
return FAISS.load_local(folder_path='store/vector', embeddings=embeddings, index_name=db_name)
return FAISS.load_local(folder_path='store/vector', embeddings=embeddings, index_name=db_name, allow_dangerous_deserialization=True)
30 changes: 17 additions & 13 deletions core/tools/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,35 @@ def rag_query_lookup(prompt_text: str) -> str:
pass


def populate_db_with_google_search(database: FAISS, query: WebQuery):
def query_for_urls(query: WebQuery, url_amount=embed_config.article_limit) -> List[str]:
print(f"{Fore.CYAN}{Style.BRIGHT}Searching for:{Style.RESET_ALL}", query.web_query)

url_list = search(
query=query.web_query,
stop= embed_config.article_limit,
stop= url_amount,
lang='en',
safe='off',
tbs=query.web_tbs,
extra_params=query.web_extra_params)

print(f"{Fore.CYAN}Web search completed.{Fore.RESET}")
return url_list

for url in url_list:
url_handle = WebBaseLoader(url)

# try downloading web content
try:
# fixme: certain sites load forever, soft-locking this loop (prompt example: car)
document = url_handle.load()
except requests.exceptions.ConnectionError:
continue
def download_article(url):
url_handle = WebBaseLoader(url)
try:
# fixme: certain sites load forever, soft-locking this loop (prompt example: car)
document = url_handle.load()
except requests.exceptions.ConnectionError:
return None
return document


if document is None:
continue
def populate_db_with_google_search(database: FAISS, query: WebQuery):
url_list = query_for_urls(query)

for url in url_list:
document = download_article(url)

text_splitter = RecursiveCharacterTextSplitter(
separators=embed_config.buffer_stops,
Expand Down
Binary file added environment-win.yml
Binary file not shown.
42 changes: 36 additions & 6 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ dependencies:
- faiss=1.7.4=py38cuda112h48d0473_0_cuda
- frozenlist=1.4.1=py38h01eb140_0
- greenlet=1.1.2=py38hfa26641_2
- idna=3.6=pyhd8ed1ab_0
- iniconfig=2.0.0=pyhd8ed1ab_0
- jsonpatch=1.33=pyhd8ed1ab_0
- jsonpointer=2.4=py38h578d9bd_3
Expand All @@ -47,9 +46,7 @@ dependencies:
- multidict=6.0.5=py38h01eb140_0
- mypy_extensions=1.0.0=pyha770c72_0
- ncurses=6.4=h6a678d5_0
- numpy=1.22.3=py38h99721a1_2
- openssl=3.2.1=hd590300_0
- packaging=23.2=pyhd8ed1ab_0
- pip=24.0=pyhd8ed1ab_0
- pluggy=1.4.0=pyhd8ed1ab_0
- pydantic=2.6.1=pyhd8ed1ab_0
Expand All @@ -71,16 +68,49 @@ dependencies:
- tiktoken=0.5.2=py38h788434f_0
- tk=8.6.12=h1ccaba5_0
- tomli=2.0.1=pyhd8ed1ab_0
- typing-extensions=4.9.0=hd8ed1ab_0
- typing_extensions=4.9.0=pyha770c72_0
- typing_inspect=0.9.0=pyhd8ed1ab_0
- urllib3=2.2.0=pyhd8ed1ab_0
- wheel=0.41.2=py38h06a4308_0
- xz=5.4.5=h5eee18b_0
- yaml=0.2.5=h7f98852_2
- yarl=1.9.4=py38h01eb140_0
- zlib=1.2.13=h5eee18b_0
- pip:
- backoff==2.2.1
- beautifulsoup4==4.12.3
- chardet==5.2.0
- click==8.1.7
- deepdiff==6.7.1
- diskcache==5.6.3
- emoji==2.11.0
- filelock==3.13.4
- filetype==1.2.0
- fsspec==2024.3.1
- google==3.0.0
- huggingface-hub==0.22.2
- idna==3.7
- jinja2==3.1.3
- joblib==1.3.2
- jsonpath-python==1.0.6
- langdetect==1.0.9
- llama-cpp-python==0.2.61
- lxml==5.1.1
- markupsafe==2.1.5
- nltk==3.8.1
- numpy==1.24.4
- ordered-set==4.1.0
- packaging==24.0
- pypdf==4.1.0
- python-dateutil==2.9.0.post0
- python-iso639==2024.2.7
- python-magic==0.4.27
- rapidfuzz==3.7.0
- six==1.16.0
- soupsieve==2.5
- tabulate==0.9.0
- tinydb==4.8.0
- tqdm==4.66.2
- typing-extensions==4.11.0
- unstructured==0.11.8
- unstructured-client==0.22.0
- urllib3==2.2.1
- wrapt==1.16.0
2 changes: 1 addition & 1 deletion terminal_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_input():
char = stdscr.getch()
if char in [curses.KEY_ENTER, 10, 13]:
break
elif char in [curses.KEY_BACKSPACE, 127]:
elif char in [curses.KEY_BACKSPACE, 8, 127]:
text = text[:-1]
elif 32 <= char <= 126:
text += chr(char)
Expand Down

0 comments on commit ed688ef

Please sign in to comment.