Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
jstzwj committed Aug 24, 2024
2 parents 8029abc + 3501520 commit ed9c0c3
Show file tree
Hide file tree
Showing 20 changed files with 555 additions and 182 deletions.
Empty file added olah/cache/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion olah/utils/bitset.py → olah/cache/bitset.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,4 @@ def __str__(self):
Returns:
str: A string representation of the Bitset object, showing the binary representation of each byte.
"""
return "".join(bin(byte)[2:].zfill(8) for byte in self.bits)
return "".join(bin(byte)[2:].zfill(8)[::-1] for byte in self.bits)
17 changes: 12 additions & 5 deletions olah/utils/olah_cache.py → olah/cache/olah_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def block_number(self) -> int:
return self._block_number

@property
def block_mask(self) -> int:
def block_mask(self) -> Bitset:
return self._block_mask

def get_header_size(self):
Expand All @@ -76,11 +76,18 @@ def _valid_header(self):
@staticmethod
def read(stream) -> "OlahCacheHeader":
obj = OlahCacheHeader()
magic, version, block_size, file_size, block_mask_size = struct.unpack(
"<4sQQQQ", stream.read(OlahCacheHeader.HEADER_FIX_SIZE)
try:
magic = struct.unpack(
"<4s", stream.read(4)
)
except struct.error:
raise Exception("File is not a Olah cache file.")
if magic[0] != OlahCacheHeader.MAGIC_NUMBER:
raise Exception("File is not a Olah cache file.")

version, block_size, file_size, block_mask_size = struct.unpack(
"<QQQQ", stream.read(OlahCacheHeader.HEADER_FIX_SIZE - 4)
)
if magic != OlahCacheHeader.MAGIC_NUMBER:
raise Exception("The file is not a valid olah cache file.")
obj._version = version
obj._block_size = block_size
obj._file_size = file_size
Expand Down
54 changes: 54 additions & 0 deletions olah/cache/stat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import argparse


import os
import sys
from olah.cache.olah_cache import OlahCache

def get_size_human(size: int) -> str:
if size > 1024 * 1024 * 1024:
return f"{int(size / (1024 * 1024 * 1024)):.4f}GB"
elif size > 1024 * 1024:
return f"{int(size / (1024 * 1024)):.4f}MB"
elif size > 1024:
return f"{int(size / (1024)):.4f}KB"
else:
return f"{size:.4f}B"

def insert_newlines(input_str, every=10):
return '\n'.join(input_str[i:i+every] for i in range(0, len(input_str), every))

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Olah Cache Visualization Tool.")
parser.add_argument("--file", "-f", type=str, required=True, help="The path of Olah cache file")
parser.add_argument("--export", "-e", type=str, default="", help="Export the cached file if all blocks are cached")
args = parser.parse_args()
print(args)

with open(args.file, "rb") as f:
f.seek(0, os.SEEK_END)
bin_size = f.tell()

try:
cache = OlahCache(args.file)
except Exception as e:
print(e)
sys.exit(1)
print(f"File: {args.file}")
print(f"Olah Cache Version: {cache.header.version}")
print(f"File Size: {get_size_human(cache.header.file_size)}")
print(f"Cache Total Size: {get_size_human(bin_size)}")
print(f"Block Size: {cache.header.block_size}")
print(f"Block Number: {cache.header.block_number}")
print(f"Cache Status: ")
cache_status = cache.header.block_mask.__str__()[:cache.header._block_number]
print(insert_newlines(cache_status, every=50))

if args.export != "":
if all([c == "1" for c in cache_status]):
with open(args.file, "rb") as f:
f.seek(cache._get_header_size(), os.SEEK_SET)
with open(args.export, "wb") as fout:
fout.write(f.read())
else:
print("Some blocks are not cached, so the export is skipped.")
Empty file added olah/database/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions olah/database/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# coding=utf-8
# Copyright 2024 XiaHan
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.

import os
from peewee import *
import datetime

from olah.utils.olah_utils import get_olah_path



db_path = os.path.join(get_olah_path(), "database.db")
db = SqliteDatabase(db_path)


class BaseModel(Model):
class Meta:
database = db


class User(BaseModel):
username = CharField(unique=True)
10 changes: 10 additions & 0 deletions olah/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,13 @@ def error_page_not_found() -> Response:
},
status_code=404,
)

def error_entry_not_found(branch: str, path: str) -> Response:
return Response(
headers={
"x-error-code": "EntryNotFound",
"x-error-message": f"{path} does not exist on \"{branch}\"",
},
status_code=404,
)

85 changes: 79 additions & 6 deletions olah/mirror/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,70 @@ def _get_description(self, commit: Commit) -> str:
readme = self._get_readme(commit)
return self._remove_card(readme)

def _get_entry_files(self, tree, include_dir=False) -> List[str]:
def _get_tree_files_recursive(self, tree, include_dir=False) -> List[str]:
out_paths = []
for entry in tree:
if entry.type == "tree":
out_paths.extend(self._get_entry_files(entry))
out_paths.extend(self._get_tree_files_recursive(entry))
if include_dir:
out_paths.append(entry.path)
else:
out_paths.append(entry.path)
return out_paths

def _get_tree_files(self, commit: Commit) -> List[str]:
return self._get_entry_files(commit.tree)
def _get_commit_files_recursive(self, commit: Commit) -> List[str]:
return self._get_tree_files_recursive(commit.tree)

def _get_tree_files(self, tree: Tree) -> List[Dict[str, Union[int, str]]]:
entries = []
for entry in tree:
lfs = False
if entry.type != "tree":
t = "file"
repr_size = entry.size
if repr_size > 120 and repr_size < 150:
# check lfs
lfs_data = entry.data_stream.read().decode("utf-8")
match_groups = re.match(
r"version https://git-lfs\.github\.com/spec/v[0-9]\noid sha256:([0-9a-z]{64})\nsize ([0-9]+?)\n",
lfs_data,
)
if match_groups is not None:
lfs = True
sha256 = match_groups.group(1)
repr_size = int(match_groups.group(2))
lfs_data = {
"oid": sha256,
"size": repr_size,
"pointerSize": entry.size,
}
else:
t = "directory"
repr_size = 0

if not lfs:
entries.append(
{
"type": t,
"oid": entry.hexsha,
"size": repr_size,
"path": entry.name,
}
)
else:
entries.append(
{
"type": t,
"oid": entry.hexsha,
"size": repr_size,
"path": entry.name,
"lfs": lfs_data,
}
)
return entries

def _get_commit_files(self, commit: Commit) -> List[Dict[str, Union[int, str]]]:
return self._get_tree_files(commit.tree)

def _get_earliest_commit(self) -> Commit:
earliest_commit = None
Expand All @@ -92,7 +143,27 @@ def _get_earliest_commit(self) -> Commit:

return earliest_commit

def get_meta(self, commit_hash: str) -> Dict[str, Any]:
def get_tree(self, commit_hash: str, path: str) -> Optional[Dict[str, Any]]:
try:
commit = self._git_repo.commit(commit_hash)
except gitdb.exc.BadName:
return None

path_part = path.split("/")
tree = commit.tree
items = self._get_tree_files(tree=tree)
for part in path_part:
if len(part.strip()) == 0:
continue
if part not in [
item["path"] for item in items if item["type"] == "directory"
]:
return None
tree = tree[part]
items = self._get_tree_files(tree=tree)
return items

def get_meta(self, commit_hash: str) -> Optional[Dict[str, Any]]:
try:
commit = self._git_repo.commit(commit_hash)
except gitdb.exc.BadName:
Expand All @@ -117,7 +188,9 @@ def get_meta(self, commit_hash: str) -> Dict[str, Any]:
meta.cardData = yaml.load(
self._match_card(self._get_readme(commit)), Loader=yaml.CLoader
)
meta.siblings = [{"rfilename": p} for p in self._get_tree_files(commit)]
meta.siblings = [
{"rfilename": p} for p in self._get_commit_files_recursive(commit)
]
meta.createdAt = self._get_earliest_commit().committed_datetime.strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
)
Expand Down
47 changes: 4 additions & 43 deletions olah/proxy/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
HUGGINGFACE_HEADER_X_LINKED_SIZE,
ORIGINAL_LOC,
)
from olah.utils.olah_cache import OlahCache
from olah.cache.olah_cache import OlahCache
from olah.utils.cache_utils import _read_cache_request, _write_cache_request
from olah.utils.url_utils import (
RemoteInfo,
add_query_param,
Expand Down Expand Up @@ -80,48 +81,6 @@ def get_contiguous_ranges(
range_start_pos = end_pos
return ranges_and_cache_list


async def _write_cache_request(
head_path: str, status_code: int, headers: Dict[str, str], content: bytes
) -> None:
"""
Write the request's status code, headers, and content to a cache file.
Args:
head_path (str): The path to the cache file.
status_code (int): The status code of the request.
headers (Dict[str, str]): The dictionary of response headers.
content (bytes): The content of the request.
Returns:
None
"""
rq = {
"status_code": status_code,
"headers": headers,
"content": content.hex(),
}
with open(head_path, "w", encoding="utf-8") as f:
f.write(json.dumps(rq, ensure_ascii=False))


async def _read_cache_request(head_path: str) -> Dict[str, str]:
"""
Read the request's status code, headers, and content from a cache file.
Args:
head_path (str): The path to the cache file.
Returns:
Dict[str, str]: A dictionary containing the status code, headers, and content of the request.
"""
with open(head_path, "r", encoding="utf-8") as f:
rq = json.loads(f.read())

rq["content"] = bytes.fromhex(rq["content"])
return rq


async def _file_full_header(
app,
save_path: str,
Expand Down Expand Up @@ -194,6 +153,8 @@ async def _file_full_header(
)
elif response.status_code == 403:
pass
elif response.status_code == 404:
pass
else:
raise Exception(
f"Unexpected HTTP status code {response.status_code}"
Expand Down
2 changes: 1 addition & 1 deletion olah/proxy/lfs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding=utf-8
# Copyright 2024 XiaHan
#
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
Expand Down
Loading

0 comments on commit ed9c0c3

Please sign in to comment.