Skip to content

Commit

Permalink
Merge pull request #31 from factly/fix/memory-issue-test-1
Browse files Browse the repository at this point in the history
Fix/memory issue test 1
  • Loading branch information
paul-tharun authored Feb 5, 2024
2 parents 2201ce5 + e094985 commit 8701d60
Show file tree
Hide file tree
Showing 31 changed files with 3,548 additions and 1,947 deletions.
31 changes: 28 additions & 3 deletions .Dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
.git
.gitignore

# Python
*/.venv/

# CI
.codeclimate.yml
.travis.yml
Expand All @@ -12,7 +15,10 @@
docker-compose.yml
.docker
.dockerignore
Dockerfile?
Dockerfile
Dockerfile.prod
Dockerfile.stag
Dockerfile.dev

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down Expand Up @@ -78,6 +84,7 @@ target/
# Virtual environment
.venv/
venv/
.vscode/

# PyCharm
.idea
Expand All @@ -101,7 +108,6 @@ venv/
README.md

# Library dependecy metadata
poetry.lock

# github workflows
.github/
Expand All @@ -112,4 +118,23 @@ poetry.lock
volumes/

# Task
tasks/
tasks/

# Example
app/example/

# Gitpod
scripts/gitpod*
scripts/codespaces*

# Github workflows
.github
.devcontainer

# Gitpod
scripts/gitpod*
scripts/codespaces*
.gitpod*

# Env Files
.env*
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
; ignore = E266, E501, W503, E203, C901
ignore = E501 app/models/
ignore = E501
exclude = .eggs,*.egg-info,.git,.hg,.tox, __pycache__,.vscode,.venv,__init__.py,.mypy_cache,.pytest_cache
max-line-length = 79
max-complexity = 18
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ cython_debug/
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
.vscode

# Local History for Visual Studio Code
.history/
Expand All @@ -175,4 +176,7 @@ cython_debug/
.ionide

# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode
n
n

# Example
app/example/
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
files: app/
repos:
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 7.0.0
hooks:
- id: flake8
- repo: https://github.com/timothycrosley/isort
rev: 5.9.3
rev: 5.12.0
hooks:
- id: isort
11 changes: 4 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9
FROM python:3.9.16-buster

WORKDIR /app

ENV POETRY_VERSION=1.2.0

# Install Poetry
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
RUN curl -sSL https://install.python-poetry.org/ | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config experimental.new-installer false && \
poetry config virtualenvs.create false

# Copy poetry.lock* in case it doesn't exist in the repo
COPY ./pyproject.toml ./poetry.lock* /
COPY ./pyproject.toml ./poetry.lock* /

# Allow installing dev dependencies to run tests
ARG INSTALL_DEV=false
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --only main ; fi"

COPY . .
ENV PYTHONPATH=/app
10 changes: 4 additions & 6 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9
FROM python:3.9.16-buster

WORKDIR /app

ENV POETRY_VERSION=1.2.0
ENV POETRY_VERSION=1.5.1

# Install Poetry
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config experimental.new-installer false && \
export PATH="/opt/poetry/bin:$PATH" && \
poetry config virtualenvs.create false

# Copy poetry.lock* in case it doesn't exist in the repo
COPY ./pyproject.toml ./poetry.lock* /

# Allow installing dev dependencies to run tests
ARG INSTALL_DEV=false
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then /opt/poetry/bin/poetry install --no-root ; else /opt/poetry/bin/poetry install --no-root --no-dev ; fi"

COPY . .
ENV PYTHONPATH=/app
20 changes: 20 additions & 0 deletions Dockerfile.prod
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.10-slim-buster as requirements-stage

WORKDIR /tmp
RUN pip install poetry
COPY ./pyproject.toml ./poetry.lock* /tmp/

RUN mkdir -p /tmp/app
COPY ./app /tmp/app

RUN poetry export -f requirements.txt --output requirements.txt --without-hashes


FROM python:3.10-slim-buster

WORKDIR /code

COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

COPY --from=requirements-stage /tmp/app /code/app
13 changes: 10 additions & 3 deletions app/api/api_v1/routers/prefetch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import APIRouter

from app.models.prefetch import Prefetch
from app.models.prefetch import Prefetch, PrefetchResponse
from app.utils.tasks import prefetch_profiles

prefetch_router = router = APIRouter()
Expand All @@ -24,10 +24,17 @@ async def prefetch_profiles_background(prefetch: Prefetch):
urls = prefetch.urls
minimal = prefetch.minimal
samples_to_fetch = prefetch.samples_to_fetch
trigger_id = prefetch.trigger_id

# Prefetch Profiles as a background job
result = prefetch_profiles.delay(
urls=urls, minimal=minimal, samples_to_fetch=samples_to_fetch
urls=urls,
minimal=minimal,
samples_to_fetch=samples_to_fetch,
trigger_id=trigger_id,
)

return result.id
return PrefetchResponse(
task_id=result.id,
trigger_id=trigger_id,
)
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List

from fastapi import APIRouter, Depends
from pandas_profiling import ProfileReport
from ydata_profiling import ProfileReport

from app.core.config import Settings
from app.models.alerts import Alerts
Expand Down
15 changes: 13 additions & 2 deletions app/core/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List

from pydantic import BaseSettings
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
Expand Down Expand Up @@ -41,10 +41,21 @@ class Settings(BaseSettings):

# MODEL PARAMS
# Constraint for Column names
COLUMN_NAME_REGEX_PATTERN = r"[\w\s]*"
COLUMN_NAME_REGEX_PATTERN: str = r"[\w\s]*"

# PROFILE SEGMENTS
SAMPLE_DATA_RENDERER: List[str] = ["head"]

# LOGGING SETTINGS
LOG_LEVEL: str = "DEBUG"
LOG_FILE_PATH: str = "logs/app.log"
LOG_FILE_SIZE: int = 100_000_000 # 100MB
LOG_FILE_BACKUP_COUNT: int = 5
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

# PROFILING SETTINGS
PROGRESS_BAR: bool = True

class Config:
env_file = ".env"
extra = "ignore"
56 changes: 56 additions & 0 deletions app/core/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging
import logging.config
import os

from app.core.config import Settings

settings = Settings()

# Create the logs directory if it doesn't exist
log_directory = os.path.dirname(settings.LOG_FILE_PATH)
if not os.path.exists(log_directory):
os.makedirs(log_directory)

# Configuration dictionary for logging
LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(asctime)s [%(levelname)s] [%(name)s:%(lineno)d] - %(message)s", # noqa: E501
"datefmt": "%Y-%m-%d %H:%M:%S",
},
},
"handlers": {
"console": {
"class": "rich.logging.RichHandler",
"level": settings.LOG_LEVEL,
},
},
"loggers": {
"": {
"level": settings.LOG_LEVEL,
"handlers": ["console"],
"propagate": True,
},
"celery": {
"level": settings.LOG_LEVEL,
"handlers": ["console"],
"propagate": True,
},
},
}

# Load the logging configuration
logging.config.dictConfig(LOGGING_CONFIG)


def get_logger(name: str) -> logging.Logger:
"""
Get a logger with the specified name.
Args:
name (str): The name of the logger.
Returns:
logging.Logger: The logger instance.
"""
return logging.getLogger(name)
8 changes: 4 additions & 4 deletions app/models/alerts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from typing import List
from typing import List, Optional

from pydantic import BaseModel
from pydantic import RootModel


class Alerts(BaseModel):
__root__: List[str]
class Alerts(RootModel[Optional[List[str]]]):
pass
10 changes: 5 additions & 5 deletions app/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from datetime import datetime, timedelta
from datetime import datetime
from typing import Optional

from pydantic.main import BaseModel


class Analysis(BaseModel):
title: str
date_start: datetime
date_end: datetime
duration: timedelta
title: Optional[str]
date_start: Optional[datetime]
date_end: Optional[datetime]

class Config:
underscore_attrs_are_private = True
1 change: 1 addition & 0 deletions app/models/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Correlations(BaseModel):
kendall: Optional[Union[Json, Dict]]
cramers: Optional[Union[Json, Dict]]
phi_k: Optional[Union[Json, Dict]]
# auto: Optional[Union[Json, Dict, Any]]

class Config:
underscore_attrs_are_private = True
8 changes: 4 additions & 4 deletions app/models/duplicates.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Union
from typing import Any

from pydantic import BaseModel, Json
from pydantic import RootModel


class Duplicates(BaseModel):
__root__: Union[Json, str]
class Duplicates(RootModel[Any]):
pass
Loading

0 comments on commit 8701d60

Please sign in to comment.