Skip to content

Commit

Permalink
Merge pull request #5 from pingcap-inc/feat-incremental-sync
Browse files Browse the repository at this point in the history
feat: incremental sync
  • Loading branch information
Cheese authored Jun 23, 2024
2 parents f62d9d1 + 44fd27d commit 4bfbfab
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 25 deletions.
10 changes: 9 additions & 1 deletion app/forum/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def get_post(post_id: int) -> CnPosts:
return dict_to_post(post)


def get_recent_updated_topics(page: int = 0) -> List[CnTopics]:
r = requests.get(f"https://asktug.com/latest.json?&page={page}")
topics = r.json()['topic_list']['topics']

return [dict_to_topic(topic) for topic in topics]


def get_topics(page: int) -> List[CnTopics]:
r = requests.get(f"https://asktug.com/latest.json?order=created&page={page}")
topics = r.json()['topic_list']['topics']
Expand Down Expand Up @@ -85,7 +92,8 @@ def get_and_save_page_sync_progress(page: int, earliest: datetime) -> bool:

# we use create time as the order to query the topics by page
topics = get_topics(page)
topics = list(filter(lambda t: datetime.datetime.strptime(t.created_at, '%Y-%m-%dT%H:%M:%S.%fZ') > earliest, topics))
topics = list(
filter(lambda t: datetime.datetime.strptime(t.created_at, '%Y-%m-%dT%H:%M:%S.%fZ') > earliest, topics))
save_page_topic_ids(topics)
print(f"Got {len(topics)} topics in page {page}!")
return len(topics) != 0
Expand Down
7 changes: 6 additions & 1 deletion app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from fastapi import FastAPI
from app.service.translate import translate_topic, translate_task
from app.service.translate import translate_topic, translate_task, translate_or_update_first_page
from app.forum.get import save_all_topic_ids

import datetime
Expand All @@ -12,6 +12,11 @@ async def topic(topic_id: int):
return translate_topic(topic_id)


@app.put("/page/{page_id}")
async def page(page_id: int):
return translate_or_update_first_page(page_id)


@app.get("/sync/topic_ids")
async def sync_topic_ids():
save_all_topic_ids(datetime.datetime.now() - datetime.timedelta(days=365*2))
Expand Down
43 changes: 21 additions & 22 deletions app/service/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,28 @@
# limitations under the License.
import time

from app.service.translate import translate_task, task_error
from app.service.translate import translate_task, task_error, translate_or_update_first_page
import threading

sleep_time = 0
sleep_time = 60


def thread_loop():
while True:
progress = None
try:
progress = translate_task()
except Exception as e:
print(f"[Error] translate_task() {progress}: {e} ")
try:
task_error(progress)
except Exception as e:
print(f"[Error] task_error() {progress}: {e} ")

if sleep_time != 0:
time.sleep(sleep_time)


thread = threading.Thread(target=thread_loop)
thread.setDaemon(True)
thread.start()
print(f"Sync task is running now.")
# def thread_loop():
# while True:
# try:
# incremental_loop()
# except Exception as e:
# print(f"[Error] translate_or_update_first_page() {e} ")
#
# if sleep_time != 0:
# time.sleep(sleep_time)
#
#
# def incremental_loop():
# translate_or_update_first_page()
#
#
# thread = threading.Thread(target=thread_loop)
# thread.setDaemon(True)
# thread.start()
# print(f"Sync task is running now.")
9 changes: 8 additions & 1 deletion app/service/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import time
from typing import List

from app.forum.get import get_topic_and_post_ids, get_post, get_topics
from app.forum.get import get_topic_and_post_ids, get_post, get_recent_updated_topics
from app.forum.create import create_post, create_topic, update_post, update_topic, client
from enum import Enum
from app.db import db_query, db_exec, engine
Expand Down Expand Up @@ -231,3 +231,10 @@ def translate_task(wait_when_none: int = 2):
print(f"[{delta.seconds}s] merged {sync_progress}")

return sync_progress


def translate_or_update_first_page(page: int = 0):
topics = get_recent_updated_topics(page)
for topic in topics:
topic_id = topic.id
translate_topic(topic_id=topic_id)

0 comments on commit 4bfbfab

Please sign in to comment.