diff --git a/Dockerfile.tokenizer-api b/Dockerfile.tokenizer-api index 23cc0ec..fdea984 100644 --- a/Dockerfile.tokenizer-api +++ b/Dockerfile.tokenizer-api @@ -1,8 +1,9 @@ FROM tensorflow/tensorflow:latest +ENV dataset=sqli_dataset1.csv ENV KMP_AFFINITY=noverbose ENV TF_CPP_MIN_LOG_LEVEL=3 -ENV DATASET_PATH=/app/sqli_dataset.csv +ENV DATASET_PATH=/app/${dataset} ENV WORKERS=4 ENV HOST=0.0.0.0 ENV PORT=8000 @@ -11,7 +12,7 @@ WORKDIR /app COPY api/api.py /app COPY api/pyproject.toml /app COPY api/poetry.lock /app -COPY dataset/sqli_dataset.csv /app +COPY dataset/${dataset} /app RUN pip install --disable-pip-version-check poetry RUN poetry install --no-root diff --git a/api/api.py b/api/api.py index 2da3bc1..531337e 100644 --- a/api/api.py +++ b/api/api.py @@ -7,7 +7,7 @@ MAX_WORDS = 10000 MAX_LEN = 100 -DATASET_PATH = os.getenv("DATASET_PATH", "dataset/sqli_dataset.csv") +DATASET_PATH = os.getenv("DATASET_PATH", "dataset/sqli_dataset1.csv") DATASET = pd.read_csv(DATASET_PATH) TOKENIZER = Tokenizer(num_words=MAX_WORDS, filters="") TOKENIZER.fit_on_texts(DATASET["Query"])