Skip to content

Commit

Permalink
Deliverable/86/remake docs (#92)
Browse files Browse the repository at this point in the history
* Removed deprecated folders

* Renamed to Lettuce to be clear

* Update .gitignore

Removed references to deprecated folders. Updated references to Carrot-Assistant

* renamed doc files folder

* Black formatting

* removed cl args from pipeline

* eot_token from model

* More opt removal

Removed opt from more parts of the llm pipelines

* removed opt from database query

* Update pipeline_routes.py

removed opt from vector_llm_pipeline

* Update pipeline_routes.py

updated imports

* docstrings

* corrected tests

tests to match no opt

* tests

* cli for rag/llm/vector search

* cli with omop query

* Update test_db.py

* Started new docs

* set paths for gh actions build

* Working quickstart

* Modifying auto-generated docs

* evaluation docs

* finish basic docs

* switch to gh actions for doc deployment

* Update _meta.js

* Update deploy.docs.yml

now deploys on pushes to main
  • Loading branch information
kuraisle authored Dec 2, 2024
1 parent 4d0339f commit aebdb17
Show file tree
Hide file tree
Showing 152 changed files with 11,279 additions and 11,615 deletions.
90 changes: 90 additions & 0 deletions .github/workflows/deploy.docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: Deploy Documentation Site to GitHub Pages

on:
push:
branches:
- main

paths:
- "website/**"

workflow_dispatch:

permissions:
contents: read
pages: write
id-token: write

concurrency:
group: "pages"
cancel-in-progress: false

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Detect package manager
id: detect-package-manager
run: |
if [ -f "website/yarn.lock" ]; then
echo "manager=yarn" >> $GITHUB_OUTPUT
echo "command=install" >> $GITHUB_OUTPUT
echo "runner=yarn" >> $GITHUB_OUTPUT
exit 0
elif [ -f "website/package.json" ]; then
echo "manager=npm" >> $GITHUB_OUTPUT
echo "command=ci" >> $GITHUB_OUTPUT
echo "runner=npx --no-install" >> $GITHUB_OUTPUT
exit 0
else
echo "Unable to determine package manager"
exit 1
fi
- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: "20"

- name: Setup Pages
uses: actions/configure-pages@v5

- name: Restore cache
uses: actions/cache@v4
with:
path: |
.next/cache
key: ${{ runner.os }}-nextjs-${{ hashFiles('**/package-lock.json', '**/yarn.lock') }}-${{ hashFiles('**.[jt]s', '**.[jt]sx') }}
restore-keys: |
${{ runner.os }}-nextjs-${{ hashFiles('**/package-lock.json', '**/yarn.lock') }}-
- name: Install dependencies
run: ${{ steps.detect-package-manager.outputs.manager }} ${{ steps.detect-package-manager.outputs.command }}
working-directory: website

- name: Build with Next.js
run: ${{ steps.detect-package-manager.outputs.runner }} next build
working-directory: website

- name: Copy built files to docs
run: |
rm -rf docs/*
cp -r website/out docs/
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: docs
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
/Lettuce/evaluation/datasets/*
!/Lettuce/evaluation/datasets/example.csv
/Lettuce/log
/website/.next
/website/node_modules
37 changes: 21 additions & 16 deletions Lettuce/assistant.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
import argparse
from logging import Logger
import time

from dotenv import load_dotenv

from components.pipeline import llm_pipeline
from utils.logging_utils import Logger
from utils.utils import *
from options.pipeline_options import LLMModel


def run(
opt: argparse.Namespace = None,
informal_names: list[str] = None,
logger: Logger | None = None,
) -> list[dict] | None:
llm_model: LLMModel,
temperature: float,
informal_names: list[str],
logger: Logger,
) -> list[dict]:
"""
Run the LLM assistant to suggest a formal drug name for an informal medicine name
Parameters
----------
opt: argparse.Namespace
The options for the assistant
llm_model: LLMModel
Choice of model to run
temperature: float
Temperature to use for generation
informal_names: list[str]
The informal names of the medications
logger: Logger
Expand All @@ -38,13 +40,10 @@ def run(
"""
run_start = time.time()
load_dotenv()
if logger is None:
logger = Logger().make_logger()

if not informal_names:
return

pipeline = llm_pipeline(opt=opt, logger=logger).get_simple_assistant()
pipeline = llm_pipeline(
llm_model=llm_model, temperature=temperature, logger=logger
).get_simple_assistant()
start = time.time()
pipeline.warm_up()
logger.info(f"Pipeline warmup in {time.time()-start} seconds")
Expand All @@ -71,7 +70,13 @@ def run(

if __name__ == "__main__":
from options.base_options import BaseOptions
from utils.logging_utils import logger

opt = BaseOptions().parse()
informal_names = opt.informal_names
run(opt=opt, informal_names=informal_names)
run(
llm_model=opt.LLMModel,
temperature=opt.temperature,
informal_names=informal_names,
logger=logger,
)
87 changes: 87 additions & 0 deletions Lettuce/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import time

from components.embeddings import Embeddings, EmbeddingModelName
from components.pipeline import llm_pipeline
from components.result import LettuceResult
from options.base_options import BaseOptions
from options.pipeline_options import LLMModel
import omop.OMOP_match
from utils.logging_utils import logger


def main():
opt = BaseOptions()
opt.initialize()
args = opt.parse()

results = [LettuceResult(name) for name in args.informal_names]

if args.vector_search & args.use_llm:
start = time.time()
pl = llm_pipeline(
LLMModel[args.llm_model],
args.temperature,
logger=logger,
embeddings_path="concept_embeddings.qdrant",
embed_vocab=["RxNorm"],
embedding_model=EmbeddingModelName.BGESMALL,
).get_rag_assistant()
pl.warm_up()
logger.info(f"Pipeline warmup in {time.time() - start} seconds")

run_start = time.time()

for query in results:
rag = pl.run(
{
"query_embedder": {"text": query.search_term},
"prompt": {"informal_name": query.search_term},
},
include_outputs_from={"retriever", "llm"},
)
query.add_vector_search_results(
[
{"content": doc.content, "score": doc.score}
for doc in rag["retriever"]["documents"]
]
)
if "llm" in rag.keys():
query.add_llm_answer(rag["llm"]["replies"][0].strip())
logger.info(f"Total RAG inference time: {time.time()-run_start}")
elif args.vector_search:
embeddings = Embeddings(
embeddings_path="concept_embeddings.qdrant",
force_rebuild=False,
embed_vocab=["RxNorm"],
model_name=EmbeddingModelName.BGESMALL,
search_kwargs={},
)
embed_results = embeddings.search(args.informal_names)
for query, result in zip(results, embed_results):
query.add_vector_search_results(result)
elif args.use_llm:
run_start = time.time()
pipeline = llm_pipeline(
llm_model=LLMModel[args.llm_model],
temperature=args.temperature,
logger=logger,
).get_simple_assistant()
pipeline.warm_up()

for query in results:
res = pipeline.run({"prompt": {"informal_name": query.search_term}})
query.add_llm_answer(res["llm"]["replies"][0].strip())

db_queries = [query.get_query() for query in results]

db_results = omop.OMOP_match.run(
search_term=db_queries,
logger=logger,
vocabulary_id=args.vocabulary_id,
search_threshold=args.search_threshold,
)

for query, result in zip(results, db_results):
query.add_matches(result, args.search_threshold)

print([result.to_dict() for result in results])
8 changes: 0 additions & 8 deletions Lettuce/components/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,6 @@ class EmbeddingModel(BaseModel):
"""
A class to match the name of an embeddings model with the
details required to download and use it.
Explanation
------------
For detailed information on the models's version, parameters,
description and benitifs, refer to the documentation at the
following path below:
-> docs/models/embedding_models.rst.txt.
"""

name: EmbeddingModelName
Expand Down
Loading

0 comments on commit aebdb17

Please sign in to comment.