From 6efc1c04f2f10f6143a31d3b73a616219c63c663 Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 10:36:00 +0800 Subject: [PATCH 1/6] supported more parameters in `InoreaderClient.__get_stream_contents` --- inoreader/client.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/inoreader/client.py b/inoreader/client.py index 5f8f860..23301e6 100644 --- a/inoreader/client.py +++ b/inoreader/client.py @@ -143,29 +143,18 @@ def get_subscription_list(self): for item in response["subscriptions"]: yield Subscription.from_json(item) - def get_stream_contents(self, stream_id, c="", limit=None): - fetched_count = 0 - stop = False - while not stop: - articles, c = self.__get_stream_contents(stream_id, c) - for a in articles: - try: - yield Article.from_json(a) - fetched_count += 1 - except Exception as e: - print(e) - continue - if limit and fetched_count >= limit: - stop = True - break - if c is None: - break - - def __get_stream_contents(self, stream_id, continuation=""): + def __get_stream_contents( + self, stream_id=None, n=50, r=None, ot=None, xt=None, it=None, c=None + ): + """reference: https://www.inoreader.com/developers/stream-contents""" self.check_token() - url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH + quote_plus(stream_id)) - params = {"n": 50, "r": "", "c": continuation, "output": "json"} # default 20, max 1000 + url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH) + if stream_id: + url = urljoin(url, quote_plus(stream_id)) + + params = {"n": n, "r": r, "ot": ot, "xt": xt, "it": it, "c": c} + params = {arg: val for arg, val in params.items() if val is not None} response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) if "continuation" in response: return response["items"], response["continuation"] From 48949aa3fadc83effda510ab426e70e3ab366c9d Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 10:38:01 +0800 Subject: [PATCH 2/6] optimized `InoreaderClient.fetch_articles` --- inoreader/client.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/inoreader/client.py b/inoreader/client.py index 23301e6..3ac7ad5 100644 --- a/inoreader/client.py +++ b/inoreader/client.py @@ -161,14 +161,15 @@ def __get_stream_contents( else: return response["items"], None - def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, limit=None, n=50): + def fetch_articles( + self, stream_id=None, folder=None, tags=None, unread=True, starred=False, limit=None, n=50 + ): self.check_token() - url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH) - if folder: - url = urljoin(url, quote_plus(self.GENERAL_TAG_TEMPLATE.format(folder))) + if not stream_id and folder: + stream_id = self.GENERAL_TAG_TEMPLATE.format(folder) - params = {"n": n, "c": str(uuid4())} + params = {"stream_id": stream_id, "n": n, "c": str(uuid4())} if unread: params["xt"] = self.READ_TAG @@ -176,8 +177,8 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim params["it"] = self.STARRED_TAG fetched_count = 0 - response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies)) - for data in response["items"]: + items, continuation = self.__get_stream_contents(**params) + for data in items: categories = { category.split("/")[-1] for category in data.get("categories", []) @@ -191,13 +192,10 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim if limit and fetched_count >= limit: break - continuation = response.get("continuation") while continuation and (not limit or fetched_count < limit): params["c"] = continuation - response = self.parse_response( - self.session.post(url, params=params, proxies=self.proxies) - ) - for data in response["items"]: + items, continuation = self.__get_stream_contents(**params) + for data in items: categories = { category.split("/")[-1] for category in data.get("categories", []) @@ -210,8 +208,6 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim if limit and fetched_count >= limit: break - continuation = response.get("continuation") - def fetch_unread(self, folder=None, tags=None, limit=None): for article in self.fetch_articles(folder=folder, tags=tags, unread=True): yield article From 4c3a5c6195de3812edb079d17e6de962d438d677 Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 10:41:10 +0800 Subject: [PATCH 3/6] supported `n` in `fetch_unread` and `fetch_starred` --- inoreader/client.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/inoreader/client.py b/inoreader/client.py index 3ac7ad5..421d763 100644 --- a/inoreader/client.py +++ b/inoreader/client.py @@ -208,12 +208,14 @@ def fetch_articles( if limit and fetched_count >= limit: break - def fetch_unread(self, folder=None, tags=None, limit=None): - for article in self.fetch_articles(folder=folder, tags=tags, unread=True): + def fetch_unread(self, folder=None, tags=None, limit=None, n=None): + for article in self.fetch_articles(folder=folder, tags=tags, unread=True, n=n): yield article - def fetch_starred(self, folder=None, tags=None, limit=None): - for article in self.fetch_articles(folder=folder, tags=tags, unread=False, starred=True): + def fetch_starred(self, folder=None, tags=None, limit=None, n=None): + for article in self.fetch_articles( + folder=folder, tags=tags, unread=False, starred=True, n=n + ): yield article def add_general_label(self, articles, label): From 99761416ee55dbae266a75dae5f84f2ff28d3947 Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 10:42:04 +0800 Subject: [PATCH 4/6] supported `--batch-size` option in some commands --- inoreader/main.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/inoreader/main.py b/inoreader/main.py index 9075edb..eb35517 100644 --- a/inoreader/main.py +++ b/inoreader/main.py @@ -200,6 +200,9 @@ def list_tags(): @main.command("fetch-unread") @click.option("-f", "--folder", required=True, help="Folder which articles belong to") @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") +@click.option( + "--batch-size", type=int, default=50, help="Maximum number of articles per API request" +) @click.option("-o", "--outfile", required=True, help="Filename to save articles") @click.option( "--out-format", @@ -208,14 +211,14 @@ def list_tags(): help="Format of output file, default: json", ) @catch_error -def fetch_unread(folder, tags, outfile, out_format): +def fetch_unread(folder, tags, batch_size, outfile, out_format): """Fetch unread articles""" client = get_client() tag_list = [] if not tags else tags.split(",") fout = codecs.open(outfile, mode="w", encoding="utf-8") writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None - for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list)): + for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list, n=batch_size)): if idx > 0 and (idx % 10) == 0: LOGGER.info("fetched %d articles", idx) title = article.title @@ -391,6 +394,10 @@ def get_subscriptions(outfile, folder, out_format): @main.command("fetch-articles") @click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch") +@click.option( + "--batch-size", type=int, default=50, help="Maximum number of articles per API request" +) +@click.option("--only-unread", is_flag=True, help="Fetch unread articles only") @click.option("-o", "--outfile", required=True, help="Filename to save results") @click.option( "--out-format", @@ -399,7 +406,7 @@ def get_subscriptions(outfile, folder, out_format): help="Format of output, default: json", ) @catch_error -def fetch_articles(outfile, stream_id, out_format): +def fetch_articles(outfile, stream_id, batch_size, only_unread, out_format): """Fetch articles by stream id""" client = get_client() @@ -409,7 +416,9 @@ def fetch_articles(outfile, stream_id, out_format): writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL) writer.writeheader() - for idx, article in enumerate(client.get_stream_contents(stream_id)): + for idx, article in enumerate( + client.fetch_articles(stream_id=stream_id, n=batch_size, unread=only_unread) + ): if idx > 0 and (idx % 10) == 0: LOGGER.info("fetched %d articles", idx) @@ -469,6 +478,9 @@ def dedupe(folder, thresh): @main.command("fetch-starred") @click.option("-f", "--folder", help="Folder which articles belong to") @click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma") +@click.option( + "--batch-size", type=int, default=50, help="Maximum number of articles per API request" +) @click.option( "-o", "--outfile", help="Filename to save articles, required when output format is `csv`" ) @@ -484,7 +496,7 @@ def dedupe(folder, thresh): help="Format of output file, default: json", ) @catch_error -def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): +def fetch_starred(folder, tags, batch_size, outfile, outdir, limit, save_image, out_format): """Fetch starred articles""" client = get_client() @@ -506,7 +518,7 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format): tag_list = [] if not tags else tags.split(",") url_to_image = {} fetched_count = 0 - for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit): + for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit, n=batch_size): if limit and fetched_count >= limit: break From 9508788abd23bdf8c43fc63a2a87fc19d9853f50 Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 10:52:59 +0800 Subject: [PATCH 5/6] Release 0.7.0 --- CHANGELOG.md | 11 +++++++++++ pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc525ab..5741ec8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # CHANGELOG +## v0.7.0 + +Removed + +- Removed `InoreaderClient.get_stream_contents`. + +Changed + +- Add param `n` to `InoreaderClient.fetch_articles` to reduce the number of API calls, thanks to [tosborne-slalom](https://github.com/tosborne-slalom) +- Supported `--batch-size` option in commands `fetch-articles`/`fetch-unread`/`fetch-starred` + ## v0.6.0 Publish to pypi! diff --git a/pyproject.toml b/pyproject.toml index e68f120..6f0e8f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "python-inoreader" -version = "0.6.0" +version = "0.7.0" description = "Python wrapper of Inoreader API" authors = [ {name = "Linusp", email = "linusp1024@gmail.com"}, From 1ffebae6f071514d5610c888e0c3b7ee22f7f590 Mon Sep 17 00:00:00 2001 From: Linusp Date: Sat, 27 Apr 2024 11:18:21 +0800 Subject: [PATCH 6/6] add `ot` to ignore words list --- codespell-ignore-words.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/codespell-ignore-words.txt b/codespell-ignore-words.txt index e69de29..f716d3b 100644 --- a/codespell-ignore-words.txt +++ b/codespell-ignore-words.txt @@ -0,0 +1 @@ +ot