Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/seasearch add wiki search sup #366

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from seafevents.app.config import ENABLE_METADATA_MANAGEMENT
from seafevents.seasearch.index_task.filename_index_updater import RepoFilenameIndexUpdater
from seafevents.repo_metadata.face_recognition_updater import RepoFaceClusterUpdater
from seafevents.seasearch.index_task.wiki_index_updater import WikiIndexUpdater


class App(object):
Expand Down Expand Up @@ -44,6 +45,7 @@ def __init__(self, config, ccnet_config, seafile_config,
self._slow_task_handler = SlowTaskHandler(config)
self._repo_face_cluster_updater = RepoFaceClusterUpdater(config)
self._repo_filename_index_updater = RepoFilenameIndexUpdater(config)
self._wiki_index_updater = WikiIndexUpdater(config)

def serve_forever(self):
if self._fg_tasks_enabled:
Expand All @@ -69,3 +71,4 @@ def serve_forever(self):
self._slow_task_handler.start()
self._repo_face_cluster_updater.start()
self._repo_filename_index_updater.start()
self._wiki_index_updater.start()
31 changes: 29 additions & 2 deletions repo_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from sqlalchemy.sql import text

from seafevents.repo_data.db import init_db_session_class
from seafevents.seasearch.utils.constants import REPO_TYPE_WIKI

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,6 +54,25 @@ def _get_mtime_by_repo_ids(self, repo_ids):
finally:
session.close()

def _get_wiki_repo_id_commit_id(self, start, count):
session = self.db_session()
try:
cmd = """SELECT RepoInfo.repo_id, Branch.commit_id, RepoInfo.type
FROM RepoInfo
INNER JOIN Branch ON RepoInfo.repo_id = Branch.repo_id
WHERE Branch.name = :name
AND RepoInfo.type = :repo_type
limit :start, :count;"""
res = session.execute(text(cmd), {'name': 'master',
'repo_type': REPO_TYPE_WIKI,
'start': start,
'count': count}).fetchall()
return res
except Exception as e:
raise e
finally:
session.close()

def _get_all_trash_repo_list(self):
session = self.db_session()
try:
Expand Down Expand Up @@ -106,7 +126,8 @@ def _get_virtual_repo_in_repos(self, repo_ids):
if not repo_ids:
return []
try:
cmd = """SELECT repo_id from VirtualRepo WHERE repo_id IN {}""".format(tuple(repo_ids))
formatted_ids = ", ".join("'{}'".format(id) for id in repo_ids)
cmd = """SELECT repo_id from VirtualRepo WHERE repo_id IN ({})""".format(formatted_ids)
res = session.execute(text(cmd)).fetchall()
return res
except Exception as e:
Expand Down Expand Up @@ -149,6 +170,13 @@ def get_repo_id_commit_id(self, start, count):
logger.error(e)
return self._get_repo_id_commit_id(start, count)

def get_wiki_repo_id_commit_id(self, start, count):
try:
return self._get_wiki_repo_id_commit_id(start, count)
except Exception as e:
logger.error(e)
return self._get_wiki_repo_id_commit_id(start, count)

def get_repo_head_commit(self, repo_id):
try:
return self._get_repo_head_commit(repo_id)
Expand All @@ -163,5 +191,4 @@ def get_virtual_repo_in_repos(self, repo_ids):
logger.error(e)
return self._get_virtual_repo_in_repos(repo_ids)


repo_data = RepoData()
33 changes: 33 additions & 0 deletions seafevent_server/request_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,36 @@ def extract_file_details():
details = add_file_details(repo_id, obj_ids, metadata_server_api)

return {'details': details}, 200


@app.route('/wiki-search', methods=['POST'])
def search_wiki():
is_valid = check_auth_token(request)
if not is_valid:
return {'error_msg': 'Permission denied'}, 403

# Check seasearch is enable
if not index_task_manager.enabled:
return {'error_msg': 'Seasearch is not enabled by seafevents.conf'}
try:
data = json.loads(request.data)
except Exception as e:
logger.exception(e)
return {'error_msg': 'Bad request.'}, 400

query = data.get('query').strip()
wiki = data.get('wiki')

if not query:
return {'error_msg': 'query invalid.'}, 400
if not wiki:
return {'error_msg': 'wiki invalid.'}, 400

try:
count = int(data.get('count'))
except:
count = 20

results = index_task_manager.wiki_search(query, wiki, count)

return {'results': results}, 200
45 changes: 44 additions & 1 deletion seasearch/index_store/index_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from seafevents.seasearch.utils import need_index_metadata_info
from seafevents.db import init_db_session_class
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX
from seafevents.seasearch.utils.constants import ZERO_OBJ_ID, REPO_FILENAME_INDEX_PREFIX, \
WIKI_INDEX_PREFIX
from seafevents.repo_metadata.metadata_server_api import MetadataServerAPI
from seafevents.repo_metadata.utils import METADATA_TABLE
from seafevents.utils import timestamp_to_isoformat_timestr
Expand Down Expand Up @@ -74,3 +75,45 @@ def delete_repo_filename_index(self, repo_id, repo_filename_index, repo_status_f

def keyword_search(self, query, repos, repo_filename_index, count, suffixes=None, search_path=None, obj_type=None):
return repo_filename_index.search_files(repos, query, 0, count, suffixes, search_path, obj_type)

def delete_wiki_index(self, wiki_id, wiki_index, wiki_status_index):
# first delete wiki_index
wiki_index_name = WIKI_INDEX_PREFIX + wiki_id
wiki_index.delete_index_by_index_name(wiki_index_name)
wiki_status_index.delete_documents_by_repo(wiki_id)

def wiki_search(self, query, wiki, wiki_index, count):
return wiki_index.search_wiki(wiki, query, 0, count)

def update_wiki_index(self, wiki_id, commit_id, wiki_index, wiki_status_index):
try:
new_commit_id = commit_id
index_name = WIKI_INDEX_PREFIX + wiki_id

wiki_index.create_index_if_missing(index_name)

wiki_status = wiki_status_index.get_repo_status_by_id(wiki_id)
from_commit = wiki_status.from_commit
to_commit = wiki_status.to_commit

if new_commit_id == from_commit:
return

if not from_commit:
commit_id = ZERO_OBJ_ID
else:
commit_id = from_commit

if wiki_status.need_recovery():
logger.warning('%s: wiki index inrecovery', wiki_id)
wiki_index.update(index_name, wiki_id, commit_id, to_commit)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

按照现在的逻辑这里还能正常recovery吗?

commit_id = to_commit
time.sleep(1)
wiki_status_index.begin_update_repo(wiki_id, commit_id, new_commit_id)
wiki_index.update(index_name, wiki_id, commit_id, new_commit_id)
wiki_status_index.finish_update_repo(wiki_id, new_commit_id)

logger.info('wiki: %s, update wiki index success', wiki_id)

except Exception as e:
logger.exception('wiki_id: %s, update wiki index error: %s.', wiki_id, e)
1 change: 0 additions & 1 deletion seasearch/index_store/repo_file_name_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import os
import logging

Expand Down
15 changes: 9 additions & 6 deletions seasearch/index_store/repo_status_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,16 @@ def create_index_if_missing(self):
def check_repo_status(self, repo_id):
return self.seasearch_api.check_document_by_id(self.index_name, repo_id).get('is_exist')

def add_repo_status(self, repo_id, commit_id, updatingto, metadata_updated_time):
date = {
def add_repo_status(self, repo_id, commit_id, updatingto, metadata_updated_time):
data = {
'repo_id': repo_id,
'commit_id': commit_id,
'updatingto': updatingto,
'metadata_updated_time': metadata_updated_time,
}

doc_id = repo_id
self.seasearch_api.create_document_by_id(self.index_name, doc_id, date)
self.seasearch_api.create_document_by_id(self.index_name, doc_id, data)

def begin_update_repo(self, repo_id, old_commit_id, new_commit_id, metadata_updated_time):
self.add_repo_status(repo_id, old_commit_id, new_commit_id, metadata_updated_time)
Expand All @@ -79,6 +80,7 @@ def get_repo_status_by_id(self, repo_id):
doc = self.seasearch_api.get_document_by_id(self.index_name, repo_id)
if doc.get('error'):
return RepoStatus(repo_id, None, None, None)

commit_id = doc['_source']['commit_id']
updatingto = doc['_source']['updatingto']
metadata_updated_time = doc['_source']['metadata_updated_time']
Expand Down Expand Up @@ -152,12 +154,13 @@ def _repo_head_search(self, query_params):
commit_id = hit.get('_source').get('commit_id')
updatingto = hit.get('_source').get('updatingto')
metadata_updated_time = hit.get('_source').get('metadata_updated_time')
repo_heads.append({
repo_head = {
'repo_id': repo_id,
'commit_id': commit_id,
'updatingto': updatingto,
'metadata_updated_time': metadata_updated_time,
})
'metadata_updated_time': metadata_updated_time
}
repo_heads.append(repo_head)
return repo_heads, total

def delete_index_by_index_name(self):
Expand Down
Loading
Loading