Skip to content

Commit

Permalink
Merge pull request #291 from truefoundry/main
Browse files Browse the repository at this point in the history
Main -> Release
  • Loading branch information
S1LV3RJ1NX authored Jul 30, 2024
2 parents 22cacb0 + 72d8e9d commit 2519c51
Show file tree
Hide file tree
Showing 24 changed files with 313 additions and 654 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ qdrant_storage/
.truefoundry
infinity/
volumes/
user_data/
pgdata/
*.bak
models_config.yaml
15 changes: 6 additions & 9 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,13 @@ RUN python3 -m venv /virtualenvs/venv/
# Copy requirements.txt
COPY backend/requirements.txt /tmp/requirements.txt
COPY backend/vectordb.requirements.txt /tmp/vectordb.requirements.txt
COPY backend/parsers.requirements.txt /tmp/parsers.requirements.txt

# Install Python packages
RUN python3 -m pip install -U pip setuptools wheel && \
python3 -m pip install --use-pep517 --no-cache-dir -r /tmp/requirements.txt

ENV LD_LIBRARY_PATH=/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cublas/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cuda_cupti/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cuda_nvrtc/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cuda_runtime/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cudnn/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cufft/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/curand/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cusolver/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/cusparse/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/nccl/lib:/virtualenvs/venv/lib/python3.11/site-packages/nvidia/nvtx/lib:/virtualenvs/venv/lib/python3.11/site-packages/torch/lib/:/usr/local/nvidia/lib:/usr/local/nvidia/lib64

ARG ADD_PYTORCH=0
# Install torch
RUN if [ "${ADD_PYTORCH}" = "1" ]; then pip install "torch==2.2.2+cu121" --extra-index-url https://download.pytorch.org/whl/cu121; fi

# Install Parser packages
ARG ADD_PARSER=0
RUN if [ "${ADD_PARSER}" = "1" ]; then python3 -m pip install --use-pep517 --no-cache-dir -r /tmp/parsers.requirements.txt; fi

# Install VectorDB packages
ARG ADD_VECTORDB=0
RUN if [ "${ADD_VECTORDB}" = "1" ]; then python3 -m pip install --use-pep517 --no-cache-dir -r /tmp/vectordb.requirements.txt; fi
Expand All @@ -40,6 +31,12 @@ ENV MODELS_CONFIG_PATH=${MODELS_CONFIG_PATH}
ARG INFINITY_API_KEY
ENV INFINITY_API_KEY=${INFINITY_API_KEY}

ARG UNSTRUCTURED_IO_URL
ENV UNSTRUCTURED_IO_URL=${UNSTRUCTURED_IO_URL}

ARG UNSTRUCTURED_IO_API_KEY
ENV UNSTRUCTURED_IO_API_KEY=${UNSTRUCTURED_IO_API_KEY}

# Copy the project files
COPY . /app

Expand Down
8 changes: 2 additions & 6 deletions backend/indexer/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ async def ingest_data_points(
file_extension=loaded_data_point.file_extension,
parsers_map=inputs.parser_config.parser_map,
max_chunk_size=inputs.parser_config.chunk_size,
chunk_overlap=inputs.parser_config.chunk_overlap,
additional_config=inputs.parser_config.additional_config,
)
if parser is None:
Expand Down Expand Up @@ -370,10 +369,8 @@ async def ingest_data(request: IngestDataToCollectionDto):
)
created_data_ingestion_run.status = DataIngestionRunStatus.COMPLETED
else:
if not settings.JOB_FQN or not settings.JOB_COMPONENT_NAME:
logger.error(
"Job FQN and Job Component Name are required to trigger the job"
)
if not settings.JOB_FQN:
logger.error("Job FQN is required to trigger the job")
raise HTTPException(
status_code=500,
detail="Job FQN and Job Component Name are required to trigger the job",
Expand All @@ -391,7 +388,6 @@ async def ingest_data(request: IngestDataToCollectionDto):
)
trigger_job(
application_fqn=settings.JOB_FQN,
component_name=settings.JOB_COMPONENT_NAME,
params={
"collection_name": collection.name,
"data_source_fqn": associated_data_source.data_source_fqn,
Expand Down
2 changes: 1 addition & 1 deletion backend/modules/metadata_store/prismastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ async def adelete_data_source(self, data_source_fqn: str):
data_source_uri = data_source.uri
# data_source_uri is of the form: `/app/users_data/folder_name`
folder_name = data_source_uri.split("/")[-1]
folder_path = os.path.join("/app/user_data", folder_name)
folder_path = os.path.join(settings.LOCAL_DATA_DIRECTORY, folder_name)
logger.info(
f"Deleting folder: {folder_path}, path exists: {os.path.exists(folder_path)}"
)
Expand Down
12 changes: 2 additions & 10 deletions backend/modules/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,7 @@
from backend.modules.parsers.codeparser import CodeParser
from backend.modules.parsers.markdownparser import MarkdownParser
from backend.modules.parsers.multimodalparser import MultiModalParser
from backend.modules.parsers.parser import register_parser
from backend.modules.parsers.pdfparser_fast import PdfParserUsingPyMuPDF
from backend.modules.parsers.tablepdfparser import PdfTableParser
from backend.modules.parsers.textparser import TextParser
from backend.modules.parsers.unstructured_io import UnstructuredIoParser

# The order of registry defines the order of precedence
register_parser("MarkdownParser", MarkdownParser)
register_parser("TextParser", TextParser)
register_parser("PdfParserFast", PdfParserUsingPyMuPDF)
register_parser("UnstructuredIoParser", UnstructuredIoParser)
register_parser("MultiModalParser", MultiModalParser)
register_parser("CodeParser", CodeParser)
register_parser("PdfTableParser", PdfTableParser)
105 changes: 0 additions & 105 deletions backend/modules/parsers/codeparser.py

This file was deleted.

134 changes: 0 additions & 134 deletions backend/modules/parsers/markdownparser.py

This file was deleted.

Loading

0 comments on commit 2519c51

Please sign in to comment.