Skip to content

Commit

Permalink
my.telegram.telegram_backup: enhance media description extraction and…
Browse files Browse the repository at this point in the history
… add more docs
  • Loading branch information
karlicoss committed Feb 3, 2025
1 parent db46380 commit 77a1d76
Showing 1 changed file with 26 additions and 10 deletions.
36 changes: 26 additions & 10 deletions src/my/telegram/telegram_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
from struct import calcsize, unpack_from

from my.config import telegram as user_config
from my.core import PathIsh, datetime_aware
from my.core import PathIsh, datetime_aware, make_logger
from my.core.sqlite import sqlite_connection

logger = make_logger(__name__, level='debug')


@dataclass
class config(user_config.telegram_backup):
Expand Down Expand Up @@ -77,7 +79,7 @@ def _message_from_row(r: sqlite3.Row, *, chats: Chats, with_extra_media_info: bo
# maybe later we'll improve it
try:
extra_media_info = _extract_extra_media_info(data=r['data'])
except Exception as e:
except Exception:
pass

return Message(
Expand Down Expand Up @@ -153,7 +155,7 @@ def getstring() -> str:
except UnicodeDecodeError as e:
raise RuntimeError(f'Failed to decode {ss}') from e

def debug(count: int=10) -> None:
def _debug(count: int=10) -> None:
print([hex(x) for x in data[pos: pos + count]])
print([chr(x) for x in data[pos: pos + count]])

Expand All @@ -166,19 +168,33 @@ def debug(count: int=10) -> None:
if has_media == 0:
return None

msg_body = getstring()
# seems like the same as 'text' column (contains a url as well?)
_msg_body = getstring()

skip(20)
url1 = getstring()
url2 = getstring()
ss_type = getstring()
# not sure if assert is really necessary her

# this seems to be present in _msg_bodyj
# however seems 'resolved' or 'normalised'. E.g. might contain 'www.' or https instead of http etc
# TODO maybe use this one instead/in addition?
_url1 = getstring()

# this is just a 'simplified' version of url1 in most cases
# however, in many cases it's a much nicer url, past a redicect?
# - url-encodes unicode
# - expands stackoverflow links
# - expands youtu.be links to full link
# TODO might be useful?
_url2 = getstring()

_ss_type = getstring()
# not sure if assert is really necessary here
# assert ss_type in {
# 'article',
# 'photo',
# 'app',
# 'video',
# }, ss_type
link_title = getstring()
link_title_2 = getstring()
link_subtitle = getstring()
link_description = getstring()
return link_description
return '\n'.join((link_title, link_subtitle, link_description))

0 comments on commit 77a1d76

Please sign in to comment.