Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix legacy google takeout #372

Merged
merged 2 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions my/google/takeout/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
from enum import Enum
import re
from pathlib import Path
from datetime import datetime, timezone
from datetime import datetime
from html.parser import HTMLParser
from typing import List, Optional, Any, Callable, Iterable, Tuple
from collections import OrderedDict
from urllib.parse import unquote

import pytz

from ...core.time import abbr_to_timezone


Expand All @@ -29,7 +31,8 @@ def parse_dt(s: str) -> datetime:
# old takeouts didn't have timezone
# hopefully it was utc? Legacy, so no that much of an issue anymore..
# todo although maybe worth adding timezone from location provider?
tz = timezone.utc
# note: need to use pytz here for localize call later
tz = pytz.utc
else:
s, tzabbr = s.rsplit(maxsplit=1)
tz = abbr_to_timezone(tzabbr)
Expand Down
11 changes: 6 additions & 5 deletions my/twitter/android.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Twitter data from official app for Android
"""

from __future__ import annotations

from dataclasses import dataclass
Expand Down Expand Up @@ -93,12 +94,11 @@ def getstring(slen: int) -> str:

(xx,) = unpack_from('B', data, offset=pos)
skip(1)
# print("TYPE:", xx)

# wtf is this... maybe it's a bitmask?
slen = {
66 : 1,
67 : 2,
66: 1,
67: 2,
106: 1,
107: 2,
}[xx]
Expand All @@ -112,7 +112,7 @@ def getstring(slen: int) -> str:
# see 1665029077034565633

extracted = []
linksep = 0x6a
linksep = 0x6A
while True:
m = re.search(b'\x6a.http', data[pos:])
if m is None:
Expand Down Expand Up @@ -175,7 +175,8 @@ def get_own_user_id(conn) -> str:
# don't think they represent bookmarking time
# - timeline_type
# 7, 8, 9: some sort of notifications or cursors, should exclude
# 17: ??? some cursors but also tweets
# 14: some converstaionthread stuff?
# 17: ??? some cursors but also tweets NOTE: they seem to contribute to user's tweets data, so make sure not to delete
# 18: ??? relatively few, maybe 20 of them, also they all have timeline_is_preview=1?
# most of them have our own id as timeline_sender?
# I think it might actually be 'replies' tab -- also contains some retweets etc
Expand Down
Loading