Skip to content

Commit

Permalink
Merge with main
Browse files Browse the repository at this point in the history
  • Loading branch information
stopmin committed Jul 18, 2024
2 parents f85265b + 4157a77 commit 610cb42
Show file tree
Hide file tree
Showing 19 changed files with 662 additions and 233 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ FROM --platform=linux/amd64 python:3.11.4-slim-bookworm

WORKDIR /app

# 필요한 시스템 패키지 설치 (libgomp 포함)
RUN apt-get update && \
apt-get install -y --no-install-recommends gcc libpq-dev libatlas-base-dev libgomp1 && \
pip install --no-cache-dir pipenv
Expand Down
9 changes: 8 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
from fastapi import FastAPI
from starlette.exceptions import HTTPException

from app.router.chatbot_article_router import chatbot_article_router
from app.config.exception_handler import exception_handler, http_exception_handler
from app.config.middlewares.request_response_logging_middle_ware import (
LoggingMiddleware,
)
from app.router.api_visualization_router import api_visualization_router
from app.router.chatbot_article_router import chatbot_article_router
from app.router.generate_simple_article_router import simple_article_router
from app.router.send_email_service_router import send_email_service_router
from app.router.user_type_router import user_type_router
Expand All @@ -33,7 +34,13 @@ async def startup_event():
app.include_router(simple_article_router)
app.include_router(send_email_service_router)
app.include_router(chatbot_article_router)
app.include_router(api_visualization_router)

# exception handlers
app.add_exception_handler(Exception, exception_handler)
app.add_exception_handler(HTTPException, http_exception_handler)


@app.get("/health")
async def health_check():
return {"status": "OK"}
18 changes: 18 additions & 0 deletions app/model/api_visualization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# api_visualization.py

from datetime import datetime

from sqlalchemy import BigInteger, Column, DateTime, String, Text

from app.database.repository import Base


class ApiVisualization(Base):
__tablename__ = "api_visualization"
__table_args__ = {"schema": "gyeongdan"}

id = Column(BigInteger, primary_key=True, autoincrement=True) # 고유 식별자
title = Column(String, nullable=False) # 제목
content = Column(Text, nullable=False) # 본문
graph_html = Column(Text, nullable=False) # html 데이터
created_at = Column(DateTime, default=datetime.now, nullable=False)
15 changes: 15 additions & 0 deletions app/model/article_related_document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from sqlalchemy import Column, String, Integer, ForeignKey, Text
from sqlalchemy.orm import relationship

from app.database.repository import Base

class ArticleRelatedDocument(Base):
__tablename__ = 'article_related_documents'
__table_args__ = {'schema': 'gyeongdan'}
id = Column(Integer, primary_key=True, autoincrement=True)
article_id = Column(Integer, ForeignKey('gyeongdan.articles.id', ondelete='CASCADE'))
title = Column(String(255), nullable=False)
link = Column(String(255), nullable=True)
snippet = Column(Text)

article = relationship("Articles", back_populates="related_documents")
6 changes: 5 additions & 1 deletion app/model/crawled_article.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime

from sqlalchemy import CHAR, BigInteger, Column, DateTime, String, Text, event, Integer
from sqlalchemy import CHAR, BigInteger, Column, DateTime, String, Text, event
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import relationship

from app.database.repository import Base

Expand All @@ -25,6 +26,9 @@ class Articles(Base):
published_at = Column(DateTime, nullable=True)
image_url = Column(String, nullable=True)

related_documents = relationship("ArticleRelatedDocument", back_populates="article")


@event.listens_for(Articles, "before_update", propagate=True)
def update_timestamp(mapper, connection, target): # pylint: disable=unused-argument
target.updated_at = datetime.now()
52 changes: 43 additions & 9 deletions app/model/prompt/graphic_article/2024-07-10.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,45 @@
You are provided with a summary of a dataset. Suggest a suitable graph type for visualizing the data along with the necessary preprocessing steps and any additional keyword arguments for the graph functions. Your response should include the graph type, the x and y values, preprocessing steps, and additional keyword arguments.
params have to be dict
You are provided with a summary of a dataset and the title of a news article. Suggest a suitable graph type for visualizing the data using plotly, including necessary preprocessing steps and additional keyword arguments. Also, create the body text of the article based on the visualization. Use only the column names provided in the summary for x_value and y_value. Follow the specified graph types and preprocessing steps only.

You should respond a json type.
Response format: {
graph_type,
x_value,
y_value,
preprocessing_steps: [{type, params}] or None,
kwargs: {additional_keyword_arguments} or None
Valid graph types: bar, line, pie, histogram, scatter, choropleth, funnel
Valid preprocessing steps: melt, pivot, groupby, filter, drop, replace, merge, sort

You should respond in JSON format.
Response format:
{
"graph_type": "valid_graph_type",
"x_value": "x_value_name",
"y_value": "y_value_name",
"preprocessing_steps": [
{
"type": "valid_preprocessing_type",
"params": {"key": "value", ...}
},
...
] or null,
"kwargs": {
"key": "value",
...
} or null,
"article": {
"body": "Detailed article body based on the data visualization, it must be Korean"
}
}

Example:
{
"graph_type": "bar",
"x_value": "category",
"y_value": "count",
"preprocessing_steps": [
{
"type": "groupby",
"params": {"by": ["category"], "agg_func": {"count": "sum"}}
}
],
"kwargs": {
"color": "blue"
},
"article": {
"body": "의대생 중 95%나 되는 인원이 시험을 치지 않겠다고 선언했어요!"
}
}
37 changes: 33 additions & 4 deletions app/recommend/recommend_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,30 @@
from app.service.user_type_service import UserTypeService


async def user_id_to_classification_id(user_id, session:AsyncSession):
async def user_type_to_classification_id(user_type) -> int:
target_features = [[user_type[0], UserTypes.ISSUE_FINDER],
[user_type[1], UserTypes.LIFESTYLE_CONSUMER],
[user_type[2], UserTypes.ENTERTAINER],
[user_type[3], UserTypes.TECH_SPECIALIST],
[user_type[4], UserTypes.PROFESSIONALS]]
target_features.sort(key=lambda x: x[0], reverse=True)
data = {
'classification_id': range(1, 11),
'ISSUE_FINDER': [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
'LIFESTYLE_CONSUMER': [1, 1, 1, 0, 0, 0, 1, 1, 1, 0],
'ENTERTAINER': [1, 0, 0, 1, 1, 0, 1, 1, 0, 1],
'TECH_SPECIALIST': [0, 1, 0, 1, 0, 1, 1, 0, 1, 1],
'PROFESSIONALS': [0, 0, 1, 0, 1, 1, 0, 1, 1, 1]
}
df = pd.DataFrame(data)
filtered_df = df[
(df[target_features[0][1].value['name']] == 1) &
(df[target_features[1][1].value['name']] == 1) &
(df[target_features[2][1].value['name']] == 1)
]
return (int)(filtered_df.iloc[0]['classification_id'])

async def user_id_to_classification_id(user_id, session:AsyncSession) -> int:
userType = await UserTypeService().get_user_type_by_id(user_id, session)
target_features = [[userType.user_type_issue_finder, UserTypes.ISSUE_FINDER],
[userType.user_type_lifestyle_consumer, UserTypes.LIFESTYLE_CONSUMER],
Expand Down Expand Up @@ -84,6 +107,8 @@ def __init__(self):
self.num_articles = None
self.user_item_matrix = None
self.model = None
self.idx_to_id = dict()
self.id_to_idx = dict()
self.user_data_path = "/./user_classification.csv"

async def initialize_data(self, session):
Expand All @@ -97,7 +122,11 @@ def set_user_datas(self):
print(self.num_classifications)

async def set_article_datas(self, session):

articles = await ArticleManageService().get_all_articles(session=session)
for idx, article in enumerate(articles):
self.idx_to_id[idx] = article.id
self.id_to_idx[article.id] = idx
self.num_articles = len(articles)
print(self.num_articles)

Expand All @@ -109,8 +138,8 @@ def make_dataset(self):
print(self.interaction_datas)
self.user_item_matrix = csr_matrix((self.interaction_datas['duration_time'].tolist(),
(self.interaction_datas['classification_id'].tolist(),
self.interaction_datas['article_id'].tolist()))
, shape=(self.num_classifications+1, self.num_articles+1))
list(map(lambda x : self.id_to_idx[x], self.interaction_datas['article_id'].tolist()))))
, shape=(self.num_classifications+1, self.num_articles))

self.user_item_matrix = (self.user_item_matrix > 0).astype(np.float32)
print("Num users: {}, num_items {}.".format(self.num_classifications, self.num_articles))
Expand All @@ -135,7 +164,7 @@ def fit_model(self):
async def get_recommend_articles(self, classification_id: int, session: AsyncSession, N: int = 10):
indices, scores = self.model.recommend(userid=classification_id, user_items=csr_matrix(self.user_item_matrix.toarray()[classification_id]), N=N)
for i in range(N):
indices[i] += 1
indices[i] = self.idx_to_id[indices[i]]

await RecommendRepository().update_recommend(id=classification_id+1, article_ids=indices, session=session)

Expand Down
35 changes: 35 additions & 0 deletions app/repository/api_visualization_crud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# api_visualization_crud.py

from fastapi import HTTPException
from sqlalchemy.ext.asyncio import AsyncSession

from app.database.repository import get_repository, model_to_dict
from app.model.api_visualization import ApiVisualization


class ApiVisualizationRepository:
# 생성
async def create(self, api_article: ApiVisualization, session: AsyncSession):
repository = get_repository(ApiVisualization)(session)
return await repository.create(model_to_dict(api_article))

async def get_by_id(self, pk: int, session: AsyncSession):
repository = get_repository(ApiVisualization)(session)
content = await repository.get(pk)
if content is None:
raise HTTPException(
status_code=404, detail="해당 순번이 존재하지 않습니다."
)
return content

async def get_all(self, session: AsyncSession):
repository = get_repository(ApiVisualization)(session)
return await repository.filter()

async def update_content(self, id: int, content: str, session: AsyncSession):
repository = get_repository(ApiVisualization)(session)
return await repository.update_by_pk(pk=id, data={content: content})

async def update_graph(self, id: int, graph_html: str, session: AsyncSession):
repository = get_repository(ApiVisualization)(session)
return await repository.update_by_pk(pk=id, data={graph_html: graph_html})
11 changes: 10 additions & 1 deletion app/repository/recommend_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,13 @@ async def update_recommend(
data={
"recommend_article_ids": article_ids
},
)
)

async def get(self, pk: int, session: AsyncSession):
repository = get_repository(Recommend)(session)
recommend = await repository.get(pk)
if recommend is None:
raise HTTPException(
status_code=404, detail="해당 classification이 존재하지 않습니다."
)
return recommend
5 changes: 4 additions & 1 deletion app/repository/user_type_crud.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
class UserTypeRepository:
async def create(self, user_type: UserType, session: AsyncSession):
repository = get_repository(UserType)(session)
return await repository.create(model_to_dict(user_type))
try:
return await repository.create(model_to_dict(user_type))
except:
return await self.get(user_type.user_id, session)

async def get(self, pk: int, session: AsyncSession):
repository = get_repository(UserType)(session)
Expand Down
78 changes: 78 additions & 0 deletions app/router/api_visualization_router.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# api_visualization_router.py

from datetime import datetime
from enum import Enum

from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession

from app.database.session import get_db_session
from app.service.api_visualization_service import (
ApiVisualizationService,
create_article,
)
from app.utils.generic_response import GenericResponseDTO

api_visualization_router = APIRouter()


class select_data(Enum):
population = "인구"
childhood = "청소년 정책"
JINJU = "진주 코비드"


class ApiVisualResponseDTO(BaseModel):
title: str
content: str
html_data: str
created_at: datetime


# 대충 이렇게 해놓고 모델 이런 거 만들어야 겠다.
@api_visualization_router.post(
"/api_visual/article/{user_input}",
response_model=GenericResponseDTO[ApiVisualResponseDTO],
)
async def api_visualization_article(
user_input: bool,
title: str,
data_set: select_data,
session: AsyncSession = Depends(get_db_session),
):
# 지금 api 에서 고른다고 가정
html_data, content = await create_article(
title=title, data=data_set.value, user_input=user_input, session=session
)

if content == "":
content = "user_input"

return GenericResponseDTO[ApiVisualResponseDTO](
data=ApiVisualResponseDTO(
title=title, content=content, html_data=html_data, created_at=datetime.now()
),
message="Successfully created article done.",
result=True,
)


@api_visualization_router.get(
"/api_visual/article/{id}", response_model=GenericResponseDTO[ApiVisualResponseDTO]
)
async def get_api_visualization_article(
id: int,
session: AsyncSession = Depends(get_db_session),
):
data = await ApiVisualizationService().get_by_id(id=id, session=session)
return GenericResponseDTO[ApiVisualResponseDTO](
data=ApiVisualResponseDTO(
title=data.title,
content=data.content,
html_data=data.graph_html,
created_at=data.created_at,
),
message="Successfully 'get' done.",
result=True,
)
Loading

0 comments on commit 610cb42

Please sign in to comment.