Skip to content

Commit

Permalink
Merge pull request #30 from Linusp/dev
Browse files Browse the repository at this point in the history
Release 0.7.0
  • Loading branch information
Linusp authored May 3, 2024
2 parents 349a1b2 + 1ffebae commit 0c8a076
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 46 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# CHANGELOG

## v0.7.0

Removed

- Removed `InoreaderClient.get_stream_contents`.

Changed

- Add param `n` to `InoreaderClient.fetch_articles` to reduce the number of API calls, thanks to [tosborne-slalom](https://github.com/tosborne-slalom)
- Supported `--batch-size` option in commands `fetch-articles`/`fetch-unread`/`fetch-starred`

## v0.6.0

Publish to pypi!
Expand Down
1 change: 1 addition & 0 deletions codespell-ignore-words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ot
65 changes: 26 additions & 39 deletions inoreader/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,52 +143,42 @@ def get_subscription_list(self):
for item in response["subscriptions"]:
yield Subscription.from_json(item)

def get_stream_contents(self, stream_id, c="", limit=None):
fetched_count = 0
stop = False
while not stop:
articles, c = self.__get_stream_contents(stream_id, c)
for a in articles:
try:
yield Article.from_json(a)
fetched_count += 1
except Exception as e:
print(e)
continue
if limit and fetched_count >= limit:
stop = True
break
if c is None:
break

def __get_stream_contents(self, stream_id, continuation=""):
def __get_stream_contents(
self, stream_id=None, n=50, r=None, ot=None, xt=None, it=None, c=None
):
"""reference: https://www.inoreader.com/developers/stream-contents"""
self.check_token()

url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH + quote_plus(stream_id))
params = {"n": 50, "r": "", "c": continuation, "output": "json"} # default 20, max 1000
url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
if stream_id:
url = urljoin(url, quote_plus(stream_id))

params = {"n": n, "r": r, "ot": ot, "xt": xt, "it": it, "c": c}
params = {arg: val for arg, val in params.items() if val is not None}
response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
if "continuation" in response:
return response["items"], response["continuation"]
else:
return response["items"], None

def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, limit=None, n=50):
def fetch_articles(
self, stream_id=None, folder=None, tags=None, unread=True, starred=False, limit=None, n=50
):
self.check_token()

url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
if folder:
url = urljoin(url, quote_plus(self.GENERAL_TAG_TEMPLATE.format(folder)))
if not stream_id and folder:
stream_id = self.GENERAL_TAG_TEMPLATE.format(folder)

params = {"n": n, "c": str(uuid4())}
params = {"stream_id": stream_id, "n": n, "c": str(uuid4())}
if unread:
params["xt"] = self.READ_TAG

if starred:
params["it"] = self.STARRED_TAG

fetched_count = 0
response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
for data in response["items"]:
items, continuation = self.__get_stream_contents(**params)
for data in items:
categories = {
category.split("/")[-1]
for category in data.get("categories", [])
Expand All @@ -202,13 +192,10 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
if limit and fetched_count >= limit:
break

continuation = response.get("continuation")
while continuation and (not limit or fetched_count < limit):
params["c"] = continuation
response = self.parse_response(
self.session.post(url, params=params, proxies=self.proxies)
)
for data in response["items"]:
items, continuation = self.__get_stream_contents(**params)
for data in items:
categories = {
category.split("/")[-1]
for category in data.get("categories", [])
Expand All @@ -221,14 +208,14 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
if limit and fetched_count >= limit:
break

continuation = response.get("continuation")

def fetch_unread(self, folder=None, tags=None, limit=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=True):
def fetch_unread(self, folder=None, tags=None, limit=None, n=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=True, n=n):
yield article

def fetch_starred(self, folder=None, tags=None, limit=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=False, starred=True):
def fetch_starred(self, folder=None, tags=None, limit=None, n=None):
for article in self.fetch_articles(
folder=folder, tags=tags, unread=False, starred=True, n=n
):
yield article

def add_general_label(self, articles, label):
Expand Down
24 changes: 18 additions & 6 deletions inoreader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ def list_tags():
@main.command("fetch-unread")
@click.option("-f", "--folder", required=True, help="Folder which articles belong to")
@click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option("-o", "--outfile", required=True, help="Filename to save articles")
@click.option(
"--out-format",
Expand All @@ -208,14 +211,14 @@ def list_tags():
help="Format of output file, default: json",
)
@catch_error
def fetch_unread(folder, tags, outfile, out_format):
def fetch_unread(folder, tags, batch_size, outfile, out_format):
"""Fetch unread articles"""
client = get_client()

tag_list = [] if not tags else tags.split(",")
fout = codecs.open(outfile, mode="w", encoding="utf-8")
writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None
for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list)):
for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list, n=batch_size)):
if idx > 0 and (idx % 10) == 0:
LOGGER.info("fetched %d articles", idx)
title = article.title
Expand Down Expand Up @@ -391,6 +394,10 @@ def get_subscriptions(outfile, folder, out_format):

@main.command("fetch-articles")
@click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option("--only-unread", is_flag=True, help="Fetch unread articles only")
@click.option("-o", "--outfile", required=True, help="Filename to save results")
@click.option(
"--out-format",
Expand All @@ -399,7 +406,7 @@ def get_subscriptions(outfile, folder, out_format):
help="Format of output, default: json",
)
@catch_error
def fetch_articles(outfile, stream_id, out_format):
def fetch_articles(outfile, stream_id, batch_size, only_unread, out_format):
"""Fetch articles by stream id"""
client = get_client()

Expand All @@ -409,7 +416,9 @@ def fetch_articles(outfile, stream_id, out_format):
writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL)
writer.writeheader()

for idx, article in enumerate(client.get_stream_contents(stream_id)):
for idx, article in enumerate(
client.fetch_articles(stream_id=stream_id, n=batch_size, unread=only_unread)
):
if idx > 0 and (idx % 10) == 0:
LOGGER.info("fetched %d articles", idx)

Expand Down Expand Up @@ -469,6 +478,9 @@ def dedupe(folder, thresh):
@main.command("fetch-starred")
@click.option("-f", "--folder", help="Folder which articles belong to")
@click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option(
"-o", "--outfile", help="Filename to save articles, required when output format is `csv`"
)
Expand All @@ -484,7 +496,7 @@ def dedupe(folder, thresh):
help="Format of output file, default: json",
)
@catch_error
def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
def fetch_starred(folder, tags, batch_size, outfile, outdir, limit, save_image, out_format):
"""Fetch starred articles"""
client = get_client()

Expand All @@ -506,7 +518,7 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
tag_list = [] if not tags else tags.split(",")
url_to_image = {}
fetched_count = 0
for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit):
for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit, n=batch_size):
if limit and fetched_count >= limit:
break

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "python-inoreader"
version = "0.6.0"
version = "0.7.0"
description = "Python wrapper of Inoreader API"
authors = [
{name = "Linusp", email = "[email protected]"},
Expand Down

0 comments on commit 0c8a076

Please sign in to comment.