Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change database systems #36

Merged
merged 25 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
121ce0b
cleanup
latekvo Jun 2, 2024
9187bc4
fix typo
latekvo Jun 2, 2024
f6cc60e
remove infinite loop in backend
latekvo Jun 2, 2024
ddf7798
remove deprecated file
latekvo Jun 2, 2024
a09e3b6
add basic SQLAlchemy schema to crawl tasks
latekvo Jun 2, 2024
5ff1115
adjust crawl_tasks sql schema
latekvo Jun 2, 2024
4c18b46
add completion_tasks sql schema
latekvo Jun 2, 2024
67d797f
add url_pool sql schema
latekvo Jun 2, 2024
ad7dafd
adapt completion_task db to use Alchemy
latekvo Jun 3, 2024
f78c428
adapt crawl_tasks db to use Alchemy
latekvo Jun 4, 2024
affd5d5
adapt url_pool db to use Alchemy
latekvo Jun 4, 2024
fedc421
Merge branch 'main' into @latekvo/change_database_system
latekvo Jun 4, 2024
f70f056
add proper list relationships where it was simple
latekvo Jun 5, 2024
e42f5cb
relate crawl_tasks with completion_tasks
latekvo Jun 6, 2024
d3aa430
add all missing relation based queries
latekvo Jun 6, 2024
cda781b
fix direct DeclarativeBase usage
latekvo Jun 8, 2024
bd4f3c5
fix circular import
latekvo Jun 8, 2024
b1b054b
fix legacy code still using tinydb
latekvo Jun 9, 2024
407bcb9
fix crashes, add init to every db
latekvo Jun 10, 2024
7dfe028
fix crashes in summarizer
latekvo Jun 10, 2024
4c2253e
added base file for common initialization
latekvo Jun 10, 2024
f777fed
fix invalid 'and' usage, all workers now run
latekvo Jun 10, 2024
9296ad1
cleanup and type fixes
latekvo Jun 11, 2024
360ea76
add better db logging system
latekvo Jun 12, 2024
d90f548
remove dead comments
latekvo Jun 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions core/databases/db_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import logging

from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase

engine = create_engine("sqlite://")

logging.basicConfig()
logging.getLogger("sqlalchemy.engine").setLevel(logging.CRITICAL)


class Base(DeclarativeBase):
pass


def db_init():
Base.metadata.create_all(engine)
129 changes: 87 additions & 42 deletions core/databases/db_completion_tasks.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,124 @@
from tinydb import Query
from sqlalchemy import String, Integer, Boolean, select, update
from sqlalchemy.orm import Mapped, mapped_column, Session, relationship

from core.databases import defaults
from core.databases.db_base import Base, engine
from core.databases.db_crawl_tasks import CrawlTask
from core.tools import utils
from core.tools.utils import use_tinydb, gen_unix_time
from core.tools.utils import gen_unix_time, page_to_range

db = use_tinydb("completion_tasks")

class CompletionTask(Base):
__tablename__ = "completion_tasks"

def db_add_completion_task(prompt, mode):
uuid: Mapped[str] = mapped_column(primary_key=True)
prompt: Mapped[str] = mapped_column(String()) # make sure postgres uses "TEXT" here
mode: Mapped[str] = mapped_column(String(12))
timestamp: Mapped[int] = mapped_column(Integer()) # time added
completion_result: Mapped[str] = mapped_column(String()) # "TEXT" type here as well

executing: Mapped[bool] = mapped_column(Boolean())
execution_date: Mapped[int] = mapped_column(Integer()) # time started completion

completed: Mapped[bool] = mapped_column(Boolean())
completion_date: Mapped[int] = mapped_column(Integer()) # time completed

required_crawl_tasks: Mapped[list["CrawlTask"]] = relationship()


def db_add_completion_task(prompt, mode) -> str:
new_uuid = utils.gen_uuid()
timestamp = utils.gen_unix_time()

db.insert(
{
"uuid": new_uuid,
"prompt": prompt,
"mode": mode,
"completed": False,
"completion_result": None,
"executing": False,
"required_crawl_tasks": [], # uuid list that has to be completed first
"completion_date": 0,
"execution_date": 0,
"timestamp": timestamp,
}
)
with Session(engine) as session:
completion_task = CompletionTask(
uuid=new_uuid,
prompt=prompt,
mode=mode,
timestamp=timestamp,
executing=False,
execution_date=0,
completed=False,
completion_date=0,
required_crawl_tasks=[],
)

session.add(completion_task)
session.commit()

return new_uuid


def db_get_completion_tasks_by_page(page: int, per_page: int = defaults.ITEMS_PER_PAGE):
# returns all as TinyDB does not support pagination
# we'll be moving to SQLite or Cassandra soon enough
results = db.all()
def db_get_completion_tasks_by_page(
page: int, per_page: int = defaults.ITEMS_PER_PAGE
) -> list[CompletionTask]:
session = Session(engine)

start, stop = page_to_range(page, per_page)

query = select(CompletionTask).slice(start, stop)

results = list(session.scalars(query))
return results


def db_get_completion_tasks_by_uuid(uuid: int):
fields = Query()
result = db.get(fields.uuid == uuid)
def db_get_completion_task_by_uuid(uuid: int) -> CompletionTask:
session = Session(engine)

query = select(CompletionTask).where(CompletionTask.uuid.is_(uuid))

result = session.scalars(query).one()
return result


def db_set_completion_task_executing(uuid: str):
fields = Query()
db.update(
{"executing": True, "execution_date": gen_unix_time()}, fields.uuid == uuid
session = Session(engine)

session.execute(
update(CompletionTask)
.where(CompletionTask.uuid.is_(uuid))
.values(executing=True, execution_date=gen_unix_time())
)

session.commit()


def db_get_incomplete_completion_tasks(amount: int = 1):
fields = Query()
session = Session(engine)

query = (
select(CompletionTask).where(CompletionTask.completed.is_(False)).limit(amount)
)

results = db.search(fields.completed == False and fields.executing == False)
results = results[:amount]
results = list(session.scalars(query).all())

for task in results:
db_set_completion_task_executing(task["uuid"])
db_set_completion_task_executing(task.uuid)

return results


def db_release_executing_tasks(uuid_list: list[str]):
fields = Query()
db.update({"executing": False}, fields.uuid.one_of(uuid_list))
session = Session(engine)

session.execute(
update(CompletionTask)
.where(CompletionTask.uuid.in_(uuid_list))
.values(executing=False, execution_date=0)
)

session.commit()


def db_update_completion_task_after_summarizing(summary: str, uuid: str):
fields = Query()
db.update(
{
"completed": True,
"completion_result": summary,
"completion_date": gen_unix_time(),
},
fields.uuid == uuid,
session = Session(engine)

session.execute(
update(CompletionTask)
.where(CompletionTask.uuid.is_(uuid))
.values(
completed=True, completion_result=summary, completion_date=gen_unix_time()
)
)

session.commit()
29 changes: 0 additions & 29 deletions core/databases/db_crawl_history.py

This file was deleted.

Loading
Loading