Skip to content

Commit

Permalink
fix(seasearch): fix create wiki index when create filename index (#374)
Browse files Browse the repository at this point in the history
* fix(seasearch): fix create wiki index when create filename index

* optimize query repo commit

* update

* update

---------

Co-authored-by: ‘JoinTyang’ <[email protected]>
  • Loading branch information
cir9no and JoinTyang authored Sep 4, 2024
1 parent 0c0aeb2 commit c437110
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 23 deletions.
51 changes: 34 additions & 17 deletions repo_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,26 @@ def to_dict(self, result_proxy):
def _get_repo_id_commit_id(self, start, count):
session = self.db_session()
try:
cmd = """SELECT repo_id, commit_id
FROM Branch WHERE name = :name
AND repo_id NOT IN (SELECT repo_id from VirtualRepo)
limit :start, :count"""
res = [(r[0], r[1]) for r in session.execute(text(cmd),
{'name': 'master',
'start': start,
'count': count})]
cmd = """SELECT RepoInfo.repo_id, Branch.commit_id, RepoInfo.type
FROM RepoInfo
INNER JOIN Branch ON RepoInfo.repo_id = Branch.repo_id
WHERE Branch.name = :name
limit :start, :count;"""
res = session.execute(text(cmd), {'name': 'master',
'start': start,
'count': count}).fetchall()
return res
except Exception as e:
raise e
finally:
session.close()


def _get_all_trash_repo_list(self):
session = self.db_session()
try:
cmd = """SELECT repo_id, repo_name, head_id, owner_id,
size, org_id, del_time FROM RepoTrash ORDER BY del_time DESC"""
res = session.execute(text(cmd))
return self.to_dict(res)
cmd = """SELECT repo_id FROM RepoTrash"""
res = session.execute(text(cmd)).fetchall()
return res
except Exception as e:
raise e
finally:
Expand All @@ -55,10 +53,9 @@ def _get_all_trash_repo_list(self):
def _get_all_repo_list(self):
session = self.db_session()
try:
cmd = """SELECT r.repo_id, c.file_count FROM Repo r LEFT JOIN RepoFileCount c
ON r.repo_id = c.repo_id"""
res = session.execute(text(cmd))
return self.to_dict(res)
cmd = """SELECT repo_id FROM Repo"""
res = session.execute(text(cmd)).fetchall()
return res
except Exception as e:
raise e
finally:
Expand Down Expand Up @@ -90,6 +87,19 @@ def _get_repo_name_mtime_size(self, repo_id):
finally:
session.close()

def _get_virtual_repo_in_repos(self, repo_ids):
session = self.db_session()
if not repo_ids:
return []
try:
cmd = """SELECT repo_id from VirtualRepo WHERE repo_id IN {}""".format(tuple(repo_ids))
res = session.execute(text(cmd)).fetchall()
return res
except Exception as e:
raise e
finally:
session.close()

def get_repo_name_mtime_size(self, repo_id):
try:
return self._get_repo_name_mtime_size(repo_id)
Expand Down Expand Up @@ -125,5 +135,12 @@ def get_repo_head_commit(self, repo_id):
logger.error(e)
return self._get_repo_head_commit(repo_id)

def get_virtual_repo_in_repos(self, repo_ids):
try:
return self._get_virtual_repo_in_repos(repo_ids)
except Exception as e:
logger.error(e)
return self._get_virtual_repo_in_repos(repo_ids)


repo_data = RepoData()
11 changes: 9 additions & 2 deletions seasearch/index_task/filename_index_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from seafevents.seasearch.index_store.index_manager import IndexManager
from seafevents.seasearch.index_store.repo_file_name_index import RepoFileNameIndex
from seafevents.seasearch.index_store.repo_status_index import RepoStatusIndex
from seafevents.seasearch.utils.constants import REPO_STATUS_FILENAME_INDEX_NAME, SHARD_NUM
from seafevents.seasearch.utils.constants import REPO_STATUS_FILENAME_INDEX_NAME, SHARD_NUM, REPO_TYPE_WIKI
from seafevents.seasearch.utils.seasearch_api import SeaSearchAPI
from seafevents.repo_data import repo_data
from seafevents.utils import parse_bool, get_opt_from_conf_or_env
Expand Down Expand Up @@ -96,6 +96,7 @@ def clear_deleted_repo(repo_status_filename_index, repo_filename_index, index_ma
def update_repo_file_name_indexes(repo_status_filename_index, repo_filename_index, index_manager, repo_data):
start, count = 0, 1000
all_repos = []

while True:
try:
repo_commits = repo_data.get_repo_id_commit_id(start, count)
Expand All @@ -107,7 +108,13 @@ def update_repo_file_name_indexes(repo_status_filename_index, repo_filename_inde
if len(repo_commits) == 0:
break

for repo_id, commit_id in repo_commits:
repo_ids = [repo[0] for repo in repo_commits if repo[2] != REPO_TYPE_WIKI]
virtual_repos = repo_data.get_virtual_repo_in_repos(repo_ids)
virtual_repo_set = {repo[0] for repo in virtual_repos}

for repo_id, commit_id, repo_type in repo_commits:
if repo_id in virtual_repo_set or repo_type == REPO_TYPE_WIKI:
continue
all_repos.append(repo_id)

index_manager.update_library_filename_index(repo_id, commit_id, repo_filename_index, repo_status_filename_index)
Expand Down
21 changes: 17 additions & 4 deletions seasearch/script/repo_filename_index_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from seafevents.seasearch.index_store.index_manager import IndexManager
from seafevents.seasearch.utils.seasearch_api import SeaSearchAPI
from seafevents.seasearch.index_store.repo_status_index import RepoStatusIndex
from seafevents.seasearch.utils.constants import REPO_STATUS_FILENAME_INDEX_NAME, REPO_FILENAME_INDEX_PREFIX
from seafevents.seasearch.utils.constants import REPO_STATUS_FILENAME_INDEX_NAME, REPO_FILENAME_INDEX_PREFIX, REPO_TYPE_WIKI
from seafevents.seasearch.index_store.repo_file_name_index import RepoFileNameIndex

logger = logging.getLogger('seasearch')
Expand Down Expand Up @@ -67,7 +67,14 @@ def run(self):
if len(repo_commits) == 0:
NO_TASKS = True
break
for repo_id, commit_id in repo_commits:

repo_ids = [repo[0] for repo in repo_commits if repo[2] != REPO_TYPE_WIKI]
virtual_repos = repo_data.get_virtual_repo_in_repos(repo_ids)
virtual_repo_set = {repo[0] for repo in virtual_repos}

for repo_id, commit_id, repo_type in repo_commits:
if repo_id in virtual_repo_set or repo_type == REPO_TYPE_WIKI:
continue
repos_queue.put((repo_id, commit_id))
repos[repo_id] = commit_id
start += per_size
Expand Down Expand Up @@ -103,7 +110,7 @@ def thread_task(self, repos_queue):
self.incr_error()

logger.info(
"%s worker updated at %s time"
"%s worker updated at %s time"
% (threading.currentThread().getName(),
time.strftime("%Y-%m-%d %H:%M", time.localtime(time.time())))
)
Expand Down Expand Up @@ -196,7 +203,13 @@ def delete_indices():
if len(repo_commits) == 0:
break

for repo_id, commit_id in repo_commits:
repo_ids = [repo[0] for repo in repo_commits if repo[2] != REPO_TYPE_WIKI]
virtual_repos = repo_data.get_virtual_repo_in_repos(repo_ids)
virtual_repo_set = {repo[0] for repo in virtual_repos}

for repo_id, commit_id, repo_type in repo_commits:
if repo_id in virtual_repo_set or repo_type == REPO_TYPE_WIKI:
continue
repo_filename_index_name = REPO_FILENAME_INDEX_PREFIX + repo_id
repo_filename_index.delete_index_by_index_name(repo_filename_index_name)

Expand Down
2 changes: 2 additions & 0 deletions seasearch/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
REPO_FILENAME_INDEX_PREFIX = 'filename_'

SHARD_NUM = 1

REPO_TYPE_WIKI = 'wiki'

0 comments on commit c437110

Please sign in to comment.