Skip to content

Commit

Permalink
code format
Browse files Browse the repository at this point in the history
  • Loading branch information
jstzwj committed Aug 14, 2024
1 parent 2ccc4ba commit 6ec1dcf
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 24 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ Or you can specify the host address and listening port:
```bash
python -m olah.server --host localhost --port 8090
```
Please remember to change the `--mirror-url` and `--mirror-lfs-url` to the actual URLs of the mirror site while modifying the host and port.
**Note: Please change --mirror-netloc and --mirror-lfs-netloc to the actual URLs of the mirror sites when modifying the host and port.**
```bash
python -m olah.server --host 192.168.1.100 --port 8090 --mirror-netloc 192.168.1.100:8090
```

The default mirror cache path is `./repos`, you can change it by `--repos-path` parameter:
```bash
Expand Down
6 changes: 5 additions & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ python -m olah.server
```bash
python -m olah.server --host localhost --port 8090
```
请记得在修改主机和端口时将`--mirror-url``--mirror-lfs-url`更改为镜像站点的实际URL。
**注意:请记得在修改主机和端口时将`--mirror-netloc``--mirror-lfs-netloc`更改为镜像站点的实际URL。**

```bash
python -m olah.server --host 192.168.1.100 --port 8090 --mirror-netloc 192.168.1.100:8090
```

默认的镜像缓存路径是`./repos`,您可以通过`--repos-path`参数进行更改:
```bash
Expand Down
8 changes: 7 additions & 1 deletion olah/mirror/meta.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# coding=utf-8
# Copyright 2024 XiaHan
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.


class RepoMeta(object):
Expand All @@ -18,7 +24,7 @@ def __init__(self) -> None:
self.cardData = None
self.siblings = None
self.createdAt = None

def to_dict(self):
return {
"_id": self._id,
Expand Down
42 changes: 24 additions & 18 deletions olah/mirror/repos.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding=utf-8
# Copyright 2024 XiaHan
#
#
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
Expand All @@ -15,6 +15,8 @@
import yaml

from olah.mirror.meta import RepoMeta


class LocalMirrorRepo(object):
def __init__(self, path: str, repo_type: str, org: str, repo: str) -> None:
self._path = path
Expand All @@ -23,21 +25,21 @@ def __init__(self, path: str, repo_type: str, org: str, repo: str) -> None:
self._repo = repo

self._git_repo = Repo(self._path)

def _sha256(self, text: Union[str, bytes]) -> str:
if isinstance(text, bytes) or isinstance(text, bytearray):
bin = text
elif isinstance(text, str):
bin = text.encode('utf-8')
bin = text.encode("utf-8")
else:
raise Exception("Invalid sha256 param type.")
sha256_hash = hashlib.sha256()
sha256_hash.update(bin)
hashed_string = sha256_hash.hexdigest()
return hashed_string

def _match_card(self, readme: str) -> str:
pattern = r'\s*---(.*?)---'
pattern = r"\s*---(.*?)---"

match = re.match(pattern, readme, flags=re.S)

Expand All @@ -46,22 +48,23 @@ def _match_card(self, readme: str) -> str:
return card_string
else:
return ""

def _remove_card(self, readme: str) -> str:
pattern = r'\s*---(.*?)---'
pattern = r"\s*---(.*?)---"
out = re.sub(pattern, "", readme, flags=re.S)
return out

def _get_readme(self, commit: Commit) -> str:
if "README.md" not in commit.tree:
return ""
else:
out: bytes = commit.tree["README.md"].data_stream.read()
return out.decode()

def _get_description(self, commit: Commit) -> str:
readme = self._get_readme(commit)
return self._remove_card(readme)

def _get_entry_files(self, tree, include_dir=False) -> List[str]:
out_paths = []
for entry in tree:
Expand All @@ -75,7 +78,6 @@ def _get_entry_files(self, tree, include_dir=False) -> List[str]:

def _get_tree_files(self, commit: Commit) -> List[str]:
return self._get_entry_files(commit.tree)


def _get_earliest_commit(self) -> Commit:
earliest_commit = None
Expand All @@ -96,12 +98,14 @@ def get_meta(self, commit_hash: str) -> Dict[str, Any]:
except gitdb.exc.BadName:
return None
meta = RepoMeta()

meta._id = self._sha256(f"{self._org}/{self._repo}/{commit.hexsha}")
meta.id = f"{self._org}/{self._repo}"
meta.author = self._org
meta.sha = commit.hexsha
meta.lastModified = self._git_repo.head.commit.committed_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
meta.lastModified = self._git_repo.head.commit.committed_datetime.strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
)
meta.private = False
meta.gated = False
meta.disabled = False
Expand All @@ -110,9 +114,13 @@ def get_meta(self, commit_hash: str) -> Dict[str, Any]:
meta.paperswithcode_id = None
meta.downloads = 0
meta.likes = 0
meta.cardData = yaml.load(self._match_card(self._get_readme(commit)), Loader=yaml.CLoader)
meta.cardData = yaml.load(
self._match_card(self._get_readme(commit)), Loader=yaml.CLoader
)
meta.siblings = [{"rfilename": p} for p in self._get_tree_files(commit)]
meta.createdAt = self._get_earliest_commit().committed_datetime.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
meta.createdAt = self._get_earliest_commit().committed_datetime.strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
)
return meta.to_dict()

def _contain_path(self, path: str, tree: Tree) -> bool:
Expand Down Expand Up @@ -149,7 +157,7 @@ def get_file(self, commit_hash: str, path: str) -> Optional[OStream]:
commit = self._git_repo.commit(commit_hash)
except gitdb.exc.BadName:
return None

def stream_wrapper(file_bytes: bytes):
file_stream = io.BytesIO(file_bytes)
while True:
Expand All @@ -158,10 +166,8 @@ def stream_wrapper(file_bytes: bytes):
break
else:
yield chunk

if not self._contain_path(path, commit.tree):
return None
else:
return stream_wrapper(commit.tree[path].data_stream.read())


3 changes: 2 additions & 1 deletion olah/utils/repo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import datetime
import os
import glob
import tenacity
from typing import Dict, Literal, Optional, Tuple, Union
import json
from urllib.parse import urljoin
Expand Down Expand Up @@ -272,7 +273,7 @@ async def get_commit_hf(
except:
return await get_commit_hf_offline(app, repo_type, org, repo, commit)


@tenacity.retry(stop=tenacity.stop_after_attempt(3))
async def check_commit_hf(
app,
repo_type: Optional[Literal["models", "datasets", "spaces"]],
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "olah"
version = "0.1.1"
version = "0.2.0"
description = "Self-hosted lightweight huggingface mirror."
readme = "README.md"
requires-python = ">=3.8"
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ toml==0.10.2
huggingface_hub==0.23.4
pytest==8.2.2
cachetools==5.4.0
PyYAML==6.0.1
PyYAML==6.0.1
tenacity==8.5.0

0 comments on commit 6ec1dcf

Please sign in to comment.