Feat/seasearch add wiki search sup #366

cir9no · 2024-08-18T19:59:43Z

No description provided.

JoinTyang · 2024-09-30T07:21:40Z

seasearch/index_store/repo_file_name_index.py

@@ -159,6 +159,8 @@ def search_files(self, repos, keyword, start=0, size=10, suffixes=None, search_p
            bulk_search_params.append(data)
            search_path = None

+
+        logger.debug('search in repo_filename_index params: %s', json.dumps(bulk_search_params))


这个去掉

JoinTyang · 2024-09-30T08:35:07Z

seasearch/index_store/index_manager.py

+        wiki_index.delete_index_by_index_name(wiki_index_name)
+        wiki_status_index.delete_documents_by_repo(wiki_id)
+
+    def keyword_search(self, query, repos, repo_filename_index, count, suffixes=None):


上面有一个 keyword_search 了

JoinTyang · 2024-09-30T09:22:31Z

seasearch/index_store/wiki_index.py

+    def check_index(self, index_name):
+        return self.seasearch_api.check_index_mapping(index_name).get('is_exist')
+
+    def query_data_by_doc_uuid(self, index_name, doc_uuids_list, start, size):


改成 query_data_by_doc_uuids

JoinTyang · 2024-09-30T09:23:09Z

seasearch/index_store/wiki_index.py

+
+
+SEASEARCH_WIKI_BULK_OPETATE_LIMIT = 25
+SEASEARCH_WIKI_QUERY_DOC_UUID_STEP = 10


这里的数值设置的太小了吧

JoinTyang · 2024-09-30T09:35:32Z

app/app.py

@@ -8,8 +8,9 @@
 from seafevents.repo_metadata.index_worker import RepoMetadataIndexWorker
 from seafevents.repo_metadata.slow_task_handler import SlowTaskHandler
 from seafevents.seafevent_server.seafevent_server import SeafEventServer
-from seafevents.app.config import ENABLE_METADATA_MANAGEMENT
+from seafevents.app.config import ENABLE_METADATA_MANAGEMENT, ENABLE_WIKI


这个是老版wiki的配置

JoinTyang · 2024-10-08T05:51:25Z

seasearch/utils/constants.py

+WIKI_INDEX_PREFIX = 'wiki_'
+
+SEASEARCH_QUERY_DOC_UUID_STEP = 20
+SEASEARCH_BULK_OPETATE_LIMIT = 25


这两个常量没用吧

JoinTyang · 2024-10-08T06:03:10Z

seasearch/utils/__init__.py

@@ -99,3 +100,26 @@ def need_index_metadata_info(repo_id, session):
        return False

    return True
+
+
+def is_wiki(path):


这个命名改一下吧，这里是判断是否是的wiki中的文件，不是这个文件是不是wiki

JoinTyang · 2024-10-08T06:48:19Z

seasearch/index_store/wiki_index.py

+        doc_uuids = [page['docUuid'] for page in config['pages'] if page['id'] in navigation_ids]
+        return doc_uuids
+
+    def extract_deleted_doc_uuids(self, config):


这个和上面的extract_doc_uuids 是不是可以放到一个方法里面处理，更合适？

JoinTyang · 2024-10-08T06:48:58Z

seasearch/index_store/wiki_index.py

+    def get_wiki_conf(self, wiki_id):
+        # Get wiki config dict
+        conf_path = posixpath.join(WIKI_CONFIG_PATH, WIKI_CONFIG_FILE_NAME)
+        conf_id = seafile_api.get_file_id_by_path(wiki_id, conf_path)


这里不应该叫conf_id吧

JoinTyang · 2024-10-08T07:14:25Z

seasearch/index_store/index_manager.py

+
+            if wiki_status.need_recovery():
+                logger.warning('%s: wiki index inrecovery', wiki_id)
+                wiki_index.update(index_name, wiki_id, commit_id, to_commit)


按照现在的逻辑这里还能正常recovery吗？

JoinTyang · 2024-10-12T08:22:26Z

seasearch/index_store/repo_status_index.py

@@ -35,9 +39,6 @@ class RepoStatusIndex(object):
            'updatingto': {
                'type': 'keyword'
            },
-            'metadata_updated_time': {
-                'type': 'keyword'
-            },


这里把之前加的属性给删了，别的功能无法工作了

JoinTyang · 2024-10-17T02:33:38Z

seasearch/script/wiki_index_local.py

+    index_local.run()
+
+    logger.info('\n\nWiki index updated, statistic report:\n')
+    logger.info('[commit read] %s', commit_mgr.read_count())


其他几项统计为什么去掉了

JoinTyang · 2024-10-17T02:39:04Z

seasearch/index_store/wiki_index.py

+        conf = self.get_wiki_conf(wiki_id)
+
+        doc_uuids = self.extract_doc_uuids(conf)
+        deleted_doc_uuids = self.extract_doc_uuids(conf, deleted=True)


doc_uuids 和 deleted_doc_uuids 可以一次性通过extract_doc_uuids 获得，否则相同的提取操作还有执行两次

JoinTyang · 2024-10-18T08:08:27Z

seasearch/script/update.lock

这个文件删掉

JoinTyang · 2024-10-18T09:03:22Z

seasearch/index_store/wiki_index.py

+
+        return content.strip()
+
+    def get_wiki_conf(self, wiki_id):


这个需要改成通过commit_id 获取config，否则逻辑上是不对的

JoinTyang · 2024-10-23T02:48:47Z

seafevent_server/request_handler.py

+
+
+@app.route('/wiki-search', methods=['POST'])
+def search_wikis():


这里应该改成单数形式，其他的相应的也要修改

JoinTyang · 2024-10-23T03:12:38Z

seasearch/index_store/wiki_index.py

+            get_library_diff_files(wiki_id, old_commit_id, new_commit_id)
+
+        conf = self.get_wiki_conf(wiki_id, new_commit_id)
+        if conf is None:


这个是通过处理异常得到的None, 这个异常不应该处理，否则程序会认为这次索引已经更新好了

JoinTyang · 2024-10-23T03:15:20Z

seasearch/index_store/wiki_index.py

+        if deleted_doc_uuids:
+            delete_documents(deleted_doc_uuids)
+
+    def normal_search(self, index_name, dsl):


这个用不到吧

JoinTyang · 2024-10-23T03:17:55Z

seasearch/index_store/wiki_index.py

+                title_match.append(r_t)
+
+        # Search in wiki name
+        name_match = []


这部分去掉

JoinTyang · 2024-10-23T03:19:24Z

seasearch/index_store/wiki_index.py

+
+        # Search in wiki title
+        title_match = []
+        for doc_uuid, title, wiki_id in title_info:


需要加wiki_title吗，而且这样加的话，会造成搜索结果中有两条相同的wiki页面

JoinTyang · 2024-10-23T06:15:52Z

seasearch/index_store/wiki_index.py

+                title_info.append((page_uuid, page["name"], wiki))
+
+        # Get wiki name
+        wiki = seafile_api.get_repo(wiki)


这个还有用吗

JoinTyang · 2024-10-23T06:16:00Z

seasearch/index_store/wiki_index.py

+        if bulk_add_params:
+            self.seasearch_api.bulk(index_name, bulk_add_params)
+
+    def delete_files(self, index_name, files, deleted_doc_uuids):


这个看下filename 索引中怎么实现的，不需要再查出id了

JoinTyang · 2024-10-23T06:27:41Z

seasearch/index_store/wiki_index.py

+        doc_uuids, deleted_doc_uuids = self.extract_doc_uuids(conf)
+
+        need_deleted_files = deleted_files + modified_files
+        self.delete_files(index_name, need_deleted_files, deleted_doc_uuids)


不需要删除modified_files吧

JoinTyang · 2024-10-23T06:31:29Z

seasearch/index_store/wiki_index.py

+            else:
+                continue
+
+            index_info = {'index': {'_index': index_name, '_id': md5(path)}}


_id 换成 doc_uuid 吧

JoinTyang · 2024-10-23T06:33:17Z

seasearch/index_store/wiki_index.py

+            'doc_uuid':{
+                'type': 'keyword',
+            },
+            'type':{


这个有用吗？我看你设置了content一种类型啊

JoinTyang · 2024-10-23T06:41:42Z

seasearch/index_store/wiki_index.py

+                if highlight_content := hit.get('highlight').get('content', [None])[0]:
+                    r.update(content=highlight_content)
+                content_match.append(r)
+        content_match = sorted(content_match, key=lambda row: row['score'], reverse=True)[:size]


这里还需要排序吗？ content_match 这个变量名也换一下

JoinTyang · 2024-10-28T02:25:06Z

seasearch/index_store/wiki_index.py

+
+        need_added_files = added_files + modified_files
+
+        recently_restore_uuid_path = {


改成 recently_restore_uuid_to_path

JoinTyang · 2024-10-28T02:26:21Z

seasearch/index_store/wiki_index.py

+        old_cfg = self.get_wiki_conf(wiki_id, old_commit_id)
+        new_cfg = self.get_wiki_conf(wiki_id, new_commit_id)
+        prev_path, prev_recycled = self.get_uuid_path_mapping(old_cfg)
+        curr_path, curr_recycled = self.get_uuid_path_mapping(new_cfg)


变量至少要是个名词吧？这里都应该是复数形式吧

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch from 09f88bf to 9f1909d Compare August 26, 2024 07:34

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch from 254f4eb to 444663d Compare September 25, 2024 07:16

JoinTyang reviewed Oct 8, 2024

View reviewed changes

JoinTyang reviewed Oct 12, 2024

View reviewed changes

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch 3 times, most recently from 08447c9 to 7e21984 Compare October 16, 2024 02:15

JoinTyang reviewed Oct 18, 2024

View reviewed changes

JoinTyang reviewed Oct 23, 2024

View reviewed changes

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch 3 times, most recently from cba51d5 to 7e34e24 Compare October 24, 2024 05:53

JoinTyang reviewed Oct 28, 2024

View reviewed changes

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch 7 times, most recently from 0dc90b3 to 99320a3 Compare November 4, 2024 02:54

feat(search): add wiki search

736a84f

cir9no force-pushed the feat/seasearch-add-wiki-search-sup branch from 4a225ae to 736a84f Compare November 4, 2024 05:48

freeplant merged commit 59250c3 into master Nov 4, 2024
1 check passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Feat/seasearch add wiki search sup #366

Feat/seasearch add wiki search sup #366

cir9no commented Aug 18, 2024

JoinTyang Sep 30, 2024

JoinTyang Sep 30, 2024

JoinTyang Sep 30, 2024

JoinTyang Sep 30, 2024

JoinTyang Sep 30, 2024

JoinTyang Oct 8, 2024

JoinTyang Oct 8, 2024

JoinTyang Oct 8, 2024

JoinTyang Oct 8, 2024

JoinTyang Oct 8, 2024

JoinTyang Oct 12, 2024

JoinTyang Oct 17, 2024

JoinTyang Oct 17, 2024

JoinTyang Oct 18, 2024

JoinTyang Oct 18, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 23, 2024

JoinTyang Oct 28, 2024

JoinTyang Oct 28, 2024



		SEASEARCH_WIKI_BULK_OPETATE_LIMIT = 25
		SEASEARCH_WIKI_QUERY_DOC_UUID_STEP = 10



		@app.route('/wiki-search', methods=['POST'])
		def search_wikis():


		need_added_files = added_files + modified_files

		recently_restore_uuid_path = {

Feat/seasearch add wiki search sup #366

Feat/seasearch add wiki search sup #366

Conversation

cir9no commented Aug 18, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment