Skip to content

Commit

Permalink
Merge pull request #32 from factly/fix/memory-issue-test-1
Browse files Browse the repository at this point in the history
Fix/memory issue test 1
  • Loading branch information
paul-tharun authored Feb 7, 2024
2 parents 8701d60 + d8eaae2 commit ce206ec
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
6 changes: 5 additions & 1 deletion app/db/mongo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from motor.motor_asyncio import AsyncIOMotorClient
from pymongo import MongoClient

from app.core.config import Settings

Expand All @@ -8,5 +9,8 @@

mongo_engine = AsyncIOMotorClient(mongo_url)
db = mongo_engine[settings.MONGODB_DATABASE]

profiles_collection = db["profiles"]

sync_mongo_engine = MongoClient(mongo_url)
sync_db = sync_mongo_engine[settings.MONGODB_DATABASE]
sync_profiles_collection = sync_db["profiles"]
19 changes: 15 additions & 4 deletions app/utils/tasks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import time

from fastapi.encoders import jsonable_encoder
from ydata_profiling import ProfileReport

from app.core.logging import get_logger
from app.db.mongo import profiles_collection
from app.db.mongo import sync_profiles_collection
from app.utils.dataframes import get_dataframe
from app.utils.profile_segments import ProfileSegments
from app.worker import celery
Expand All @@ -26,9 +28,11 @@ def prefetch_profile(
minimal (bool, optional): Mode of Profile that needs to be fetched. Defaults to True. # noqa: E501
samples_to_fetch (int, optional): Samples of Dataset rows to fetch. Defaults to 10. # noqa: E501
"""

start_time = time.perf_counter()
dataframe = get_dataframe(url)
logger.info(f"Prefetching Profile for: {url}")
fetch_time = time.perf_counter() - start_time
logger.info(f"Time taken to fetch the dataset: {fetch_time:0.4f} seconds")

if dataframe.shape[0] < 100:
logger.info(f"Dataset has less than 100 rows: {dataframe.shape[0]}")
Expand All @@ -47,16 +51,23 @@ def prefetch_profile(
profile_segment = ProfileSegments(profile, columns=list(dataframe.columns))

description = profile_segment.description()

profile_time = time.perf_counter() - fetch_time
# Add `url` to the description before saving to MongoDB
description["url"] = url
description["trigger_id"] = trigger_id
logger.info(
f"Time taken to generate the profile: {profile_time:0.4f} seconds"
)

# Upsert a json-encoded description into MongoDB
profiles_collection.update_one(
sync_profiles_collection.update_one(
{"url": url}, {"$set": jsonable_encoder(description)}, upsert=True
)
logger.info(f"Profile Prefetched for: {url}")
upsert_time = time.perf_counter() - profile_time
logger.info(
f"Time taken to upsert the profile: {upsert_time:0.4f} seconds"
)
return


Expand Down

0 comments on commit ce206ec

Please sign in to comment.