Merge branch 'Feature/#959' into Feature/#1036

Marker-Inc-Korea · Dec 8, 2024 · bb8b0d7 · bb8b0d7
2 parents d4ab9ca + 9f27257
commit bb8b0d7
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 5 deletions.
diff --git a/api/app.py b/api/app.py
@@ -421,6 +421,41 @@ async def get_parse_documents(project_id):
     return jsonify(result_dict_list), 200
 
 
+@app.route("/projects/<project_id>/parse/<parsed_name>", methods=["GET"])
+@project_exists(WORK_DIR)
+async def get_parsed_file(project_id: str, parsed_name: str):
+    parsed_folder = os.path.join(WORK_DIR, project_id, "parse")
+    raw_df = pd.read_parquet(
+        os.path.join(parsed_folder, parsed_name, "0.parquet"), engine="pyarrow"
+    )
+    requested_filename = request.args.get("filename", type=str)
+    requested_page = request.args.get("page", -1, type=int)
+
+    if requested_filename is None:
+        return jsonify({"error": "Filename is required"}), 400
+
+    if requested_page < -1:
+        return jsonify({"error": "Invalid page number"}), 400
+
+    requested_filepath = os.path.join(
+        WORK_DIR, project_id, "raw_data", requested_filename
+    )
+
+    raw_row = raw_df.loc[raw_df["path"] == requested_filepath].loc[
+        raw_df["page"] == requested_page
+    ]
+    if len(raw_row) <= 0:
+        raw_row = raw_df.loc[raw_df["path"] == requested_filepath].loc[
+            raw_df["page"] == -1
+        ]
+        if len(raw_row) <= 0:
+            return jsonify({"error": "No matching document found"}), 404
+
+    result_dict = raw_row.iloc[0].to_dict()
+
+    return jsonify(result_dict), 200
+
+
 @app.route("/projects/<project_id>/chunk", methods=["GET"])
 @project_exists(WORK_DIR)
 async def get_chunk_documents(project_id):
@@ -465,6 +500,7 @@ async def parse_documents_endpoint(project_id):
         config = data["config"]
         target_extension = data["extension"]
         parse_name = data["name"]
+        all_files: bool = data.get("all_files", True)
 
         parse_dir = os.path.join(WORK_DIR, project_id, "parse")
 
@@ -476,6 +512,7 @@ async def parse_documents_endpoint(project_id):
             config_str=yaml.dump(config),
             parse_name=parse_name,
             glob_path=f"*.{target_extension}",
+            all_files=all_files,
         )
         task_id = task.id
         return jsonify({"task_id": task_id, "status": "started"})

diff --git a/api/src/run.py b/api/src/run.py
@@ -13,13 +13,13 @@
 from src.schema import QACreationRequest
 
 
-def run_parser_start_parsing(data_path_glob, project_dir, yaml_path):
+def run_parser_start_parsing(data_path_glob, project_dir, yaml_path, all_files: bool):
     # Import Parser here if it's defined in another module
     parser = Parser(data_path_glob=data_path_glob, project_dir=project_dir)
     print(
         f"Parser started with data_path_glob: {data_path_glob}, project_dir: {project_dir}, yaml_path: {yaml_path}"
     )
-    parser.start_parsing(yaml_path, all_files=True)
+    parser.start_parsing(yaml_path, all_files=all_files)
     print("Parser completed")
 
 

diff --git a/api/tasks/trial_tasks.py b/api/tasks/trial_tasks.py
@@ -203,7 +203,12 @@ def generate_qa_documents(self, project_id: str, request_data: Dict[str, Any]):
 
 @shared_task(bind=True, base=TrialTask)
 def parse_documents(
-    self, project_id: str, config_str: str, parse_name: str, glob_path: str = "*.*"
+    self,
+    project_id: str,
+    config_str: str,
+    parse_name: str,
+    glob_path: str = "*.*",
+    all_files: bool = True,
 ):
     load_dotenv(ENV_FILEPATH)
     try:
@@ -247,7 +252,9 @@ def parse_documents(
         with open(yaml_path, "w", encoding="utf-8") as f:
             yaml.safe_dump(config_dict, f, allow_unicode=True)
 
-        result = run_parser_start_parsing(raw_data_path, parsed_data_path, yaml_path)
+        result = run_parser_start_parsing(
+            raw_data_path, parsed_data_path, yaml_path, all_files
+        )
 
         self.update_state_and_db(
             trial_id="",

diff --git a/autorag-frontend b/autorag-frontend
+3 −3		app/api/sample/config/[yamlName]/route.ts
+3 −8		app/projects/[project_id]/(main)/qa/page.tsx
+2 −1		app/projects/[project_id]/trials/[trial_id]/chat/page.tsx
+2 −1		app/projects/[project_id]/trials/[trial_id]/report/page.tsx
+2 −2		app/service/[project_id]/layout.tsx
+3 −3		app/settings/page.tsx
+4 −24		components/projects/project-detail.tsx
+12 −11		components/projects/projects.tsx
+7 −10		components/projects/trial-detail-page.tsx
+4 −4		components/projects/trials/chat/chat-page.tsx
+4 −4		components/projects/trials/report/report-page.tsx
+8 −3		components/service/optimization/optimization-page.tsx
+21 −0		components/settings/host-setting.tsx
+0 −2		sample_configs/cheap-speed-ko.yaml