Merge with main

Gyeongdan · Jul 18, 2024 · 610cb42 · 610cb42
2 parents f85265b + 4157a77
commit 610cb42
Show file tree

Hide file tree

Showing 19 changed files with 662 additions and 233 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -2,6 +2,7 @@ FROM --platform=linux/amd64 python:3.11.4-slim-bookworm
 
 WORKDIR /app
 
+# 필요한 시스템 패키지 설치 (libgomp 포함)
 RUN apt-get update && \
     apt-get install -y --no-install-recommends gcc libpq-dev libatlas-base-dev libgomp1 && \
     pip install --no-cache-dir pipenv

diff --git a/app/main.py b/app/main.py
@@ -4,11 +4,12 @@
 from fastapi import FastAPI
 from starlette.exceptions import HTTPException
 
-from app.router.chatbot_article_router import chatbot_article_router
 from app.config.exception_handler import exception_handler, http_exception_handler
 from app.config.middlewares.request_response_logging_middle_ware import (
     LoggingMiddleware,
 )
+from app.router.api_visualization_router import api_visualization_router
+from app.router.chatbot_article_router import chatbot_article_router
 from app.router.generate_simple_article_router import simple_article_router
 from app.router.send_email_service_router import send_email_service_router
 from app.router.user_type_router import user_type_router
@@ -33,7 +34,13 @@ async def startup_event():
 app.include_router(simple_article_router)
 app.include_router(send_email_service_router)
 app.include_router(chatbot_article_router)
+app.include_router(api_visualization_router)
 
 # exception handlers
 app.add_exception_handler(Exception, exception_handler)
 app.add_exception_handler(HTTPException, http_exception_handler)
+
+
+@app.get("/health")
+async def health_check():
+    return {"status": "OK"}
diff --git a/app/model/api_visualization.py b/app/model/api_visualization.py
@@ -0,0 +1,18 @@
+# api_visualization.py
+
+from datetime import datetime
+
+from sqlalchemy import BigInteger, Column, DateTime, String, Text
+
+from app.database.repository import Base
+
+
+class ApiVisualization(Base):
+    __tablename__ = "api_visualization"
+    __table_args__ = {"schema": "gyeongdan"}
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)  # 고유 식별자
+    title = Column(String, nullable=False)  # 제목
+    content = Column(Text, nullable=False)  # 본문
+    graph_html = Column(Text, nullable=False)  # html 데이터
+    created_at = Column(DateTime, default=datetime.now, nullable=False)
diff --git a/app/model/article_related_document.py b/app/model/article_related_document.py
@@ -0,0 +1,15 @@
+from sqlalchemy import Column, String, Integer, ForeignKey, Text
+from sqlalchemy.orm import relationship
+
+from app.database.repository import Base
+
+class ArticleRelatedDocument(Base):
+    __tablename__ = 'article_related_documents'
+    __table_args__ = {'schema': 'gyeongdan'}
+    id = Column(Integer, primary_key=True, autoincrement=True)
+    article_id = Column(Integer, ForeignKey('gyeongdan.articles.id', ondelete='CASCADE'))
+    title = Column(String(255), nullable=False)
+    link = Column(String(255), nullable=True)
+    snippet = Column(Text)
+
+    article = relationship("Articles", back_populates="related_documents")
diff --git a/app/model/crawled_article.py b/app/model/crawled_article.py
@@ -1,7 +1,8 @@
 from datetime import datetime
 
-from sqlalchemy import CHAR, BigInteger, Column, DateTime, String, Text, event, Integer
+from sqlalchemy import CHAR, BigInteger, Column, DateTime, String, Text, event
 from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.orm import relationship
 
 from app.database.repository import Base
 
@@ -25,6 +26,9 @@ class Articles(Base):
     published_at = Column(DateTime, nullable=True)
     image_url = Column(String, nullable=True)
 
+    related_documents = relationship("ArticleRelatedDocument", back_populates="article")
+
+
 @event.listens_for(Articles, "before_update", propagate=True)
 def update_timestamp(mapper, connection, target):  # pylint: disable=unused-argument
     target.updated_at = datetime.now()
diff --git a/app/model/prompt/graphic_article/2024-07-10.txt b/app/model/prompt/graphic_article/2024-07-10.txt
@@ -1,11 +1,45 @@
-You are provided with a summary of a dataset. Suggest a suitable graph type for visualizing the data along with the necessary preprocessing steps and any additional keyword arguments for the graph functions. Your response should include the graph type, the x and y values, preprocessing steps, and additional keyword arguments.
-params have to be dict
+You are provided with a summary of a dataset and the title of a news article. Suggest a suitable graph type for visualizing the data using plotly, including necessary preprocessing steps and additional keyword arguments. Also, create the body text of the article based on the visualization. Use only the column names provided in the summary for x_value and y_value. Follow the specified graph types and preprocessing steps only.
 
-You should respond a json type.
-Response format: {
-  graph_type,
-  x_value,
-  y_value,
-  preprocessing_steps: [{type, params}] or None,
-  kwargs: {additional_keyword_arguments} or None
+Valid graph types: bar, line, pie, histogram, scatter, choropleth, funnel
+Valid preprocessing steps: melt, pivot, groupby, filter, drop, replace, merge, sort
+
+You should respond in JSON format.
+Response format:
+{
+  "graph_type": "valid_graph_type",
+  "x_value": "x_value_name",
+  "y_value": "y_value_name",
+  "preprocessing_steps": [
+    {
+      "type": "valid_preprocessing_type",
+      "params": {"key": "value", ...}
+    },
+    ...
+  ] or null,
+  "kwargs": {
+    "key": "value",
+    ...
+  } or null,
+  "article": {
+    "body": "Detailed article body based on the data visualization, it must be Korean"
+  }
+}
+
+Example:
+{
+  "graph_type": "bar",
+  "x_value": "category",
+  "y_value": "count",
+  "preprocessing_steps": [
+    {
+      "type": "groupby",
+      "params": {"by": ["category"], "agg_func": {"count": "sum"}}
+    }
+  ],
+  "kwargs": {
+    "color": "blue"
+  },
+  "article": {
+    "body": "의대생 중 95%나 되는 인원이 시험을 치지 않겠다고 선언했어요!"
+  }
 }
diff --git a/app/recommend/recommend_service.py b/app/recommend/recommend_service.py
@@ -19,7 +19,30 @@
 from app.service.user_type_service import UserTypeService
 
 
-async def user_id_to_classification_id(user_id, session:AsyncSession):
+async def user_type_to_classification_id(user_type) -> int:
+    target_features = [[user_type[0], UserTypes.ISSUE_FINDER],
+                       [user_type[1], UserTypes.LIFESTYLE_CONSUMER],
+                       [user_type[2], UserTypes.ENTERTAINER],
+                       [user_type[3], UserTypes.TECH_SPECIALIST],
+                       [user_type[4], UserTypes.PROFESSIONALS]]
+    target_features.sort(key=lambda x: x[0], reverse=True)
+    data = {
+        'classification_id': range(1, 11),
+        'ISSUE_FINDER':         [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
+        'LIFESTYLE_CONSUMER':   [1, 1, 1, 0, 0, 0, 1, 1, 1, 0],
+        'ENTERTAINER':          [1, 0, 0, 1, 1, 0, 1, 1, 0, 1],
+        'TECH_SPECIALIST':      [0, 1, 0, 1, 0, 1, 1, 0, 1, 1],
+        'PROFESSIONALS':        [0, 0, 1, 0, 1, 1, 0, 1, 1, 1]
+    }
+    df = pd.DataFrame(data)
+    filtered_df = df[
+        (df[target_features[0][1].value['name']] == 1) &
+        (df[target_features[1][1].value['name']] == 1) &
+        (df[target_features[2][1].value['name']] == 1)
+        ]
+    return (int)(filtered_df.iloc[0]['classification_id'])
+
+async def user_id_to_classification_id(user_id, session:AsyncSession) -> int:
     userType = await UserTypeService().get_user_type_by_id(user_id, session)
     target_features = [[userType.user_type_issue_finder, UserTypes.ISSUE_FINDER],
                        [userType.user_type_lifestyle_consumer, UserTypes.LIFESTYLE_CONSUMER],
@@ -84,6 +107,8 @@ def __init__(self):
         self.num_articles = None
         self.user_item_matrix = None
         self.model = None
+        self.idx_to_id = dict()
+        self.id_to_idx = dict()
         self.user_data_path = "/./user_classification.csv"
 
     async def initialize_data(self, session):
@@ -97,7 +122,11 @@ def set_user_datas(self):
         print(self.num_classifications)
 
     async def set_article_datas(self, session):
+
         articles = await ArticleManageService().get_all_articles(session=session)
+        for idx, article in enumerate(articles):
+            self.idx_to_id[idx] = article.id
+            self.id_to_idx[article.id] = idx
         self.num_articles = len(articles)
         print(self.num_articles)
 
@@ -109,8 +138,8 @@ def make_dataset(self):
         print(self.interaction_datas)
         self.user_item_matrix = csr_matrix((self.interaction_datas['duration_time'].tolist(),
                     (self.interaction_datas['classification_id'].tolist(),
-                     self.interaction_datas['article_id'].tolist()))
-                                           , shape=(self.num_classifications+1, self.num_articles+1))
+                     list(map(lambda x : self.id_to_idx[x], self.interaction_datas['article_id'].tolist()))))
+                                           , shape=(self.num_classifications+1, self.num_articles))
 
         self.user_item_matrix = (self.user_item_matrix > 0).astype(np.float32)
         print("Num users: {}, num_items {}.".format(self.num_classifications, self.num_articles))
@@ -135,7 +164,7 @@ def fit_model(self):
     async def get_recommend_articles(self, classification_id: int, session: AsyncSession, N: int = 10):
         indices, scores = self.model.recommend(userid=classification_id, user_items=csr_matrix(self.user_item_matrix.toarray()[classification_id]), N=N)
         for i in range(N):
-            indices[i] += 1
+            indices[i] = self.idx_to_id[indices[i]]
 
         await RecommendRepository().update_recommend(id=classification_id+1, article_ids=indices, session=session)
 

diff --git a/app/repository/api_visualization_crud.py b/app/repository/api_visualization_crud.py
@@ -0,0 +1,35 @@
+# api_visualization_crud.py
+
+from fastapi import HTTPException
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.database.repository import get_repository, model_to_dict
+from app.model.api_visualization import ApiVisualization
+
+
+class ApiVisualizationRepository:
+    # 생성
+    async def create(self, api_article: ApiVisualization, session: AsyncSession):
+        repository = get_repository(ApiVisualization)(session)
+        return await repository.create(model_to_dict(api_article))
+
+    async def get_by_id(self, pk: int, session: AsyncSession):
+        repository = get_repository(ApiVisualization)(session)
+        content = await repository.get(pk)
+        if content is None:
+            raise HTTPException(
+                status_code=404, detail="해당 순번이 존재하지 않습니다."
+            )
+        return content
+
+    async def get_all(self, session: AsyncSession):
+        repository = get_repository(ApiVisualization)(session)
+        return await repository.filter()
+
+    async def update_content(self, id: int, content: str, session: AsyncSession):
+        repository = get_repository(ApiVisualization)(session)
+        return await repository.update_by_pk(pk=id, data={content: content})
+
+    async def update_graph(self, id: int, graph_html: str, session: AsyncSession):
+        repository = get_repository(ApiVisualization)(session)
+        return await repository.update_by_pk(pk=id, data={graph_html: graph_html})
diff --git a/app/repository/recommend_crud.py b/app/repository/recommend_crud.py
@@ -16,4 +16,13 @@ async def update_recommend(
             data={
                 "recommend_article_ids": article_ids
             },
-        )
+        )
+
+    async def get(self, pk: int, session: AsyncSession):
+        repository = get_repository(Recommend)(session)
+        recommend = await repository.get(pk)
+        if recommend is None:
+            raise HTTPException(
+                status_code=404, detail="해당 classification이 존재하지 않습니다."
+            )
+        return recommend
diff --git a/app/repository/user_type_crud.py b/app/repository/user_type_crud.py
@@ -11,7 +11,10 @@
 class UserTypeRepository:
     async def create(self, user_type: UserType, session: AsyncSession):
         repository = get_repository(UserType)(session)
-        return await repository.create(model_to_dict(user_type))
+        try:
+            return await repository.create(model_to_dict(user_type))
+        except:
+            return await self.get(user_type.user_id, session)
 
     async def get(self, pk: int, session: AsyncSession):
         repository = get_repository(UserType)(session)

diff --git a/app/router/api_visualization_router.py b/app/router/api_visualization_router.py
@@ -0,0 +1,78 @@
+# api_visualization_router.py
+
+from datetime import datetime
+from enum import Enum
+
+from fastapi import APIRouter, Depends
+from pydantic import BaseModel
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from app.database.session import get_db_session
+from app.service.api_visualization_service import (
+    ApiVisualizationService,
+    create_article,
+)
+from app.utils.generic_response import GenericResponseDTO
+
+api_visualization_router = APIRouter()
+
+
+class select_data(Enum):
+    population = "인구"
+    childhood = "청소년 정책"
+    JINJU = "진주 코비드"
+
+
+class ApiVisualResponseDTO(BaseModel):
+    title: str
+    content: str
+    html_data: str
+    created_at: datetime
+
+
+# 대충 이렇게 해놓고 모델 이런 거 만들어야 겠다.
+@api_visualization_router.post(
+    "/api_visual/article/{user_input}",
+    response_model=GenericResponseDTO[ApiVisualResponseDTO],
+)
+async def api_visualization_article(
+    user_input: bool,
+    title: str,
+    data_set: select_data,
+    session: AsyncSession = Depends(get_db_session),
+):
+    # 지금 api 에서 고른다고 가정
+    html_data, content = await create_article(
+        title=title, data=data_set.value, user_input=user_input, session=session
+    )
+
+    if content == "":
+        content = "user_input"
+
+    return GenericResponseDTO[ApiVisualResponseDTO](
+        data=ApiVisualResponseDTO(
+            title=title, content=content, html_data=html_data, created_at=datetime.now()
+        ),
+        message="Successfully created article done.",
+        result=True,
+    )
+
+
+@api_visualization_router.get(
+    "/api_visual/article/{id}", response_model=GenericResponseDTO[ApiVisualResponseDTO]
+)
+async def get_api_visualization_article(
+    id: int,
+    session: AsyncSession = Depends(get_db_session),
+):
+    data = await ApiVisualizationService().get_by_id(id=id, session=session)
+    return GenericResponseDTO[ApiVisualResponseDTO](
+        data=ApiVisualResponseDTO(
+            title=data.title,
+            content=data.content,
+            html_data=data.graph_html,
+            created_at=data.created_at,
+        ),
+        message="Successfully 'get' done.",
+        result=True,
+    )