-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add_summary_index #379
add_summary_index #379
Conversation
d49901f
to
613bbee
Compare
@@ -29,6 +30,10 @@ class RepoFileNameIndex(object): | |||
}, | |||
}, | |||
}, | |||
'description': { | |||
'type': 'text', | |||
'analyzer': 'standard' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
不需要指定,默认就是standard
@@ -104,7 +104,7 @@ def thread_task(self, repos_queue): | |||
repo_id = queue_data[0] | |||
commit_id = queue_data[1] | |||
try: | |||
self.index_manager.update_library_filename_index(repo_id, commit_id, self.repo_filename_index, self.repo_status_filename_index) | |||
self.index_manager.update_library_filename_index(repo_id, commit_id, self.repo_filename_index, self.repo_status_filename_index, 3600 * 24 * 365) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
脚本中只更新一年的吗
@@ -98,6 +103,8 @@ def _make_match_query(field, key_word, **kw): | |||
} | |||
} | |||
}) | |||
if need_index_description(repo_id, session, metadata_server_api): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个判断是不是可以不加?否则每次全局查询都有查很多次数据库和metadata
per_size = SEASEARCH_BULK_OPETATE_LIMIT | ||
start = 0 | ||
while True: | ||
hits, total = self.query_data_by_paths(index_name, paths, start, per_size) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
改成循环paths
if interval: | ||
last_update_time = datetime.now() - timedelta(seconds=interval) | ||
last_update_time = timestamp_to_isoformat_timestr(last_update_time.timestamp()) | ||
sql = f"SELECT `_id`, `_mtime`, `_description`, `_parent_dir`, `_name` FROM `{METADATA_TABLE.name}` WHERE `_mtime` >= '{last_update_time}'" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
加上文件类型的筛选条件
272023e
to
37605eb
Compare
|
||
need_added_files = added_files + modified_files | ||
self.add_files(index_name, repo_id, need_added_files) | ||
update_paths = [] | ||
add_rows = {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
类似这样的变量都需要改成名词
|
||
if new_commit_id == from_commit: | ||
return | ||
description_updated_time = repo_status.description_updated_time |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里需要考虑初次创建索引时为空的情况
if need_index_description(repo_id, self.session, self.metadata_server_api): | ||
if description_updated_time: | ||
last_update_time = timestamp_to_isoformat_timestr(float(description_updated_time)) | ||
sql = f"SELECT `_id`, `_mtime`, `_description`, `_parent_dir`, `_name`, `_obj_id` FROM `{METADATA_TABLE.name}` WHERE `_is_dir` = False AND `_mtime` >= '{last_update_time}'" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里改成分页查询,因为有的资料库文件可能会比较多
else: | ||
sql = f"SELECT `_id`, `_mtime`, `_description`, `_parent_dir`, `_name`, `_obj_id` FROM `{METADATA_TABLE.name}` WHERE `_is_dir` = False" | ||
query_timestamp = time.time() | ||
rows = self.metadata_server_api.query_rows(repo_id, sql, []).get('results', []) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这里加下判断 rows 为空并且commit没有变化就return
@@ -213,6 +217,7 @@ def add_files(self, index_name, repo_id, files): | |||
'path': path, | |||
'suffix': suffix, | |||
'filename': filename, | |||
'description': rows.get(obj_id, ''), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
不能通过obj_id 获取description,因为新建的文件obj_id都是一样的
description_updated_time = datetime(1970, 1, 1).timestamp() | ||
last_update_time = timestamp_to_isoformat_timestr(float(description_updated_time)) | ||
sql = f"SELECT `_id`, `_mtime`, `_description`, `_parent_dir`, `_name`, `_obj_id` FROM `{METADATA_TABLE.name}` WHERE `_is_dir` = False AND `_mtime` >= '{last_update_time}'" | ||
query_timestamp = time.time() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
query_timestamp 这个放到每次启动这个定时任务时获取,否则会错过一些索引更新期间添加的description
91ff1ae
to
452bf35
Compare
452bf35
to
ac32b77
Compare
No description provided.