Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

trending words endpoint #405

Merged
merged 2 commits into from
Mar 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions backend/alembic/versions/4b97d64fa93d_add_db_for_trending_words.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Add db for trending words

Revision ID: 4b97d64fa93d
Revises: 4e16c97d06e7
Create Date: 2021-03-08 06:17:07.485897

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "4b97d64fa93d"
down_revision = "4e16c97d06e7"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"trending_words",
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("created_at", sa.DateTime(), nullable=True),
sa.Column("updated_at", sa.DateTime(), nullable=True),
sa.Column("data", sa.JSON(), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_trending_words_id"), "trending_words", ["id"], unique=False
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_trending_words_id"), table_name="trending_words")
op.drop_table("trending_words")
# ### end Alembic commands ###
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ pandas
jenkspy
sigfig
GitPython
nltk
6 changes: 6 additions & 0 deletions backend/router/stories.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,9 @@ def explore(
return crud.get_story_feed(
db, current_story.id, current_story.latitude, current_story.longitude
)


@router.get("/trending")
def trending(db: Session = Depends(get_db)):
db_trending = crud.get_trending_words(db)
return db_trending
64 changes: 64 additions & 0 deletions backend/stories/crud.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
from typing import List
import random
import datetime

from sqlalchemy.orm import Session, joinedload
from sqlalchemy.sql.expression import func, and_
import nltk
from nltk.corpus import stopwords
from nltk.probability import FreqDist
import string
import asyncio

from database import Base
from users.models import User
from . import models, schemas

nltk.download("stopwords")
sampling_trending_words = False


def update(model_id: int, dto: schemas.BaseModel, model: Base, db: Session):
item_as_dict = dict(dto)
Expand Down Expand Up @@ -198,3 +207,58 @@ def rand_per_story(arr: [models.MyStory]):
output.append(ms)

return output


def get_trending_words(db: Session):
db_trending = db.query(models.Trending).first()
now = datetime.datetime.now()

if not db_trending or (now - db_trending.updated_at).days >= 7:
run_sample_task(db, db_trending)

return db_trending


def run_sample_task(db: Session, to_update):
global sampling_trending_words
if sampling_trending_words:
return

sampling_trending_words = True
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(sample_trending_words(db, to_update))
loop.close()


async def sample_trending_words(db: Session, to_update):
global sampling_trending_words
k = 3

db_stories = db.query(models.Story).all()
stop_words = set(stopwords.words("english"))
all_words = []

for story in db_stories:
for my_story in story.my_stories:
if not my_story.text:
continue

text = my_story.text.lower().translate(
str.maketrans("", "", string.punctuation)
)
for word in text.split():
if word not in stop_words:
all_words.append(word)

fdist = FreqDist(all_words)
top = fdist.most_common(k)

if to_update:
update(to_update.id, {"data": top}, models.Trending, db)
else:
db_trending = models.Trending(data=top)
db.add(db_trending)
db.commit()

sampling_trending_words = False
6 changes: 6 additions & 0 deletions backend/stories/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,9 @@ class MyStory(Base):

story = relationship("Story", back_populates="my_stories")
comments = relationship("Comment", lazy="select")


class Trending(Base):
__tablename__ = "trending_words"

data = Column(JSON)