Skip to content

Commit

Permalink
feat(search): add wiki search
Browse files Browse the repository at this point in the history
  • Loading branch information
cir9no committed Nov 4, 2024
1 parent f530357 commit 736a84f
Show file tree
Hide file tree
Showing 16 changed files with 1,070 additions and 15 deletions.
3 changes: 3 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from seafevents.app.config import ENABLE_METADATA_MANAGEMENT
from seafevents.seasearch.index_task.filename_index_updater import RepoFilenameIndexUpdater
from seafevents.repo_metadata.face_recognition_updater import RepoFaceClusterUpdater
from seafevents.seasearch.index_task.wiki_index_updater import WikiIndexUpdater


class App(object):
Expand Down Expand Up @@ -44,6 +45,7 @@ def __init__(self, config, ccnet_config, seafile_config,
self._slow_task_handler = SlowTaskHandler(config)
self._repo_face_cluster_updater = RepoFaceClusterUpdater(config)
self._repo_filename_index_updater = RepoFilenameIndexUpdater(config)
self._wiki_index_updater = WikiIndexUpdater(config)

def serve_forever(self):
if self._fg_tasks_enabled:
Expand All @@ -69,3 +71,4 @@ def serve_forever(self):
self._slow_task_handler.start()
self._repo_face_cluster_updater.start()
self._repo_filename_index_updater.start()
self._wiki_index_updater.start()
31 changes: 29 additions & 2 deletions repo_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from sqlalchemy.sql import text

from seafevents.repo_data.db import init_db_session_class
from seafevents.seasearch.utils.constants import REPO_TYPE_WIKI

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,6 +54,25 @@ def _get_mtime_by_repo_ids(self, repo_ids):
finally:
session.close()

def _get_wiki_repo_id_commit_id(self, start, count):
session = self.db_session()
try:
cmd = """SELECT RepoInfo.repo_id, Branch.commit_id, RepoInfo.type
FROM RepoInfo
INNER JOIN Branch ON RepoInfo.repo_id = Branch.repo_id
WHERE Branch.name = :name
AND RepoInfo.type = :repo_type
limit :start, :count;"""
res = session.execute(text(cmd), {'name': 'master',
'repo_type': REPO_TYPE_WIKI,
'start': start,
'count': count}).fetchall()
return res
except Exception as e:
raise e
finally:
session.close()

def _get_all_trash_repo_list(self):
session = self.db_session()
try:
Expand Down Expand Up @@ -106,7 +126,8 @@ def _get_virtual_repo_in_repos(self, repo_ids):
if not repo_ids:
return []
try:
cmd = """SELECT repo_id from VirtualRepo WHERE repo_id IN {}""".format(tuple(repo_ids))
formatted_ids = ", ".join("'{}'".format(id) for id in repo_ids)
cmd = """SELECT repo_id from VirtualRepo WHERE repo_id IN ({})""".format(formatted_ids)
res = session.execute(text(cmd)).fetchall()
return res
except Exception as e:
Expand Down Expand Up @@ -149,6 +170,13 @@ def get_repo_id_commit_id(self, start, count):
logger.error(e)
return self._get_repo_id_commit_id(start, count)

def get_wiki_repo_id_commit_id(self, start, count):
try:
return self._get_wiki_repo_id_commit_id(start, count)
except Exception as e:
logger.error(e)
return self._get_wiki_repo_id_commit_id(start, count)

def get_repo_head_commit(self, repo_id):
try:
return self._get_repo_head_commit(repo_id)
Expand All @@ -163,5 +191,4 @@ def get_virtual_repo_in_repos(self, repo_ids):
logger.error(e)
return self._get_virtual_repo_in_repos(repo_ids)


repo_data = RepoData()
33 changes: 33 additions & 0 deletions seafevent_server/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,36 @@ def extract_file_details():
details = add_file_details(repo_id, obj_ids, metadata_server_api)

return {'details': details}, 200


@app.route('/wiki-search', methods=['POST'])
def search_wiki():
is_valid = check_auth_token(request)
if not is_valid:
return {'error_msg': 'Permission denied'}, 403

# Check seasearch is enable
if not index_task_manager.enabled:
return {'error_msg': 'Seasearch is not enabled by seafevents.conf'}
try:
data = json.loads(request.data)
except Exception as e:
logger.exception(e)
return {'error_msg': 'Bad request.'}, 400

query = data.get('query').strip()
wiki = data.get('wiki')

if not query:
return {'error_msg': 'query invalid.'}, 400
if not wiki:
return {'error_msg': 'wiki invalid.'}, 400

try:
count = int(data.get('count'))
except:
count = 20

results = index_task_manager.wiki_search(query, wiki, count)

return {'results': results}, 200
45 changes: 44 additions & 1 deletion seasearch/index_store/index_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from seafevents.seasearch.utils import need_index_metadata_info
from seafevents.db import init_db_session_class
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX, \
WIKI_INDEX_PREFIX
from seafevents.repo_metadata.metadata_server_api import MetadataServerAPI
from seafevents.repo_metadata.utils import METADATA_TABLE
from seafevents.utils import timestamp_to_isoformat_timestr
Expand Down Expand Up @@ -74,3 +75,45 @@ def delete_repo_filename_index(self, repo_id, repo_filename_index, repo_status_f

def keyword_search(self, query, repos, repo_filename_index, count, suffixes=None, search_path=None, obj_type=None):
return repo_filename_index.search_files(repos, query, 0, count, suffixes, search_path, obj_type)

def delete_wiki_index(self, wiki_id, wiki_index, wiki_status_index):
# first delete wiki_index
wiki_index_name = WIKI_INDEX_PREFIX + wiki_id
wiki_index.delete_index_by_index_name(wiki_index_name)
wiki_status_index.delete_documents_by_repo(wiki_id)

def wiki_search(self, query, wiki, wiki_index, count):
return wiki_index.search_wiki(wiki, query, 0, count)

def update_wiki_index(self, wiki_id, commit_id, wiki_index, wiki_status_index):
try:
new_commit_id = commit_id
index_name = WIKI_INDEX_PREFIX + wiki_id

wiki_index.create_index_if_missing(index_name)

wiki_status = wiki_status_index.get_repo_status_by_id(wiki_id)
from_commit = wiki_status.from_commit
to_commit = wiki_status.to_commit

if new_commit_id == from_commit:
return

if not from_commit:
commit_id = ZERO_OBJ_ID
else:
commit_id = from_commit

if wiki_status.need_recovery():
logger.warning('%s: wiki index inrecovery', wiki_id)
wiki_index.update(index_name, wiki_id, commit_id, to_commit)
commit_id = to_commit
time.sleep(1)
wiki_status_index.begin_update_repo(wiki_id, commit_id, new_commit_id)
wiki_index.update(index_name, wiki_id, commit_id, new_commit_id)
wiki_status_index.finish_update_repo(wiki_id, new_commit_id)

logger.info('wiki: %s, update wiki index success', wiki_id)

except Exception as e:
logger.exception('wiki_id: %s, update wiki index error: %s.', wiki_id, e)
1 change: 0 additions & 1 deletion seasearch/index_store/repo_file_name_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
import logging

Expand Down
15 changes: 9 additions & 6 deletions seasearch/index_store/repo_status_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,16 @@ def create_index_if_missing(self):
def check_repo_status(self, repo_id):
return self.seasearch_api.check_document_by_id(self.index_name, repo_id).get('is_exist')

def add_repo_status(self, repo_id, commit_id, updatingto, metadata_updated_time):
date = {
def add_repo_status(self, repo_id, commit_id, updatingto, metadata_updated_time):
data = {
'repo_id': repo_id,
'commit_id': commit_id,
'updatingto': updatingto,
'metadata_updated_time': metadata_updated_time,
}

doc_id = repo_id
self.seasearch_api.create_document_by_id(self.index_name, doc_id, date)
self.seasearch_api.create_document_by_id(self.index_name, doc_id, data)

def begin_update_repo(self, repo_id, old_commit_id, new_commit_id, metadata_updated_time):
self.add_repo_status(repo_id, old_commit_id, new_commit_id, metadata_updated_time)
Expand All @@ -79,6 +80,7 @@ def get_repo_status_by_id(self, repo_id):
doc = self.seasearch_api.get_document_by_id(self.index_name, repo_id)
if doc.get('error'):
return RepoStatus(repo_id, None, None, None)

commit_id = doc['_source']['commit_id']
updatingto = doc['_source']['updatingto']
metadata_updated_time = doc['_source']['metadata_updated_time']
Expand Down Expand Up @@ -152,12 +154,13 @@ def _repo_head_search(self, query_params):
commit_id = hit.get('_source').get('commit_id')
updatingto = hit.get('_source').get('updatingto')
metadata_updated_time = hit.get('_source').get('metadata_updated_time')
repo_heads.append({
repo_head = {
'repo_id': repo_id,
'commit_id': commit_id,
'updatingto': updatingto,
'metadata_updated_time': metadata_updated_time,
})
'metadata_updated_time': metadata_updated_time
}
repo_heads.append(repo_head)
return repo_heads, total

def delete_index_by_index_name(self):
Expand Down
Loading

0 comments on commit 736a84f

Please sign in to comment.