Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release 0.7.0 #30

Merged
merged 6 commits into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# CHANGELOG

## v0.7.0

Removed

- Removed `InoreaderClient.get_stream_contents`.

Changed

- Add param `n` to `InoreaderClient.fetch_articles` to reduce the number of API calls, thanks to [tosborne-slalom](https://github.com/tosborne-slalom)
- Supported `--batch-size` option in commands `fetch-articles`/`fetch-unread`/`fetch-starred`

## v0.6.0

Publish to pypi!
Expand Down
1 change: 1 addition & 0 deletions codespell-ignore-words.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ot
65 changes: 26 additions & 39 deletions inoreader/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,52 +143,42 @@ def get_subscription_list(self):
for item in response["subscriptions"]:
yield Subscription.from_json(item)

def get_stream_contents(self, stream_id, c="", limit=None):
fetched_count = 0
stop = False
while not stop:
articles, c = self.__get_stream_contents(stream_id, c)
for a in articles:
try:
yield Article.from_json(a)
fetched_count += 1
except Exception as e:
print(e)
continue
if limit and fetched_count >= limit:
stop = True
break
if c is None:
break

def __get_stream_contents(self, stream_id, continuation=""):
def __get_stream_contents(
self, stream_id=None, n=50, r=None, ot=None, xt=None, it=None, c=None
):
"""reference: https://www.inoreader.com/developers/stream-contents"""
self.check_token()

url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH + quote_plus(stream_id))
params = {"n": 50, "r": "", "c": continuation, "output": "json"} # default 20, max 1000
url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
if stream_id:
url = urljoin(url, quote_plus(stream_id))

params = {"n": n, "r": r, "ot": ot, "xt": xt, "it": it, "c": c}
params = {arg: val for arg, val in params.items() if val is not None}
response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
if "continuation" in response:
return response["items"], response["continuation"]
else:
return response["items"], None

def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, limit=None, n=50):
def fetch_articles(
self, stream_id=None, folder=None, tags=None, unread=True, starred=False, limit=None, n=50
):
self.check_token()

url = urljoin(BASE_URL, self.STREAM_CONTENTS_PATH)
if folder:
url = urljoin(url, quote_plus(self.GENERAL_TAG_TEMPLATE.format(folder)))
if not stream_id and folder:
stream_id = self.GENERAL_TAG_TEMPLATE.format(folder)

params = {"n": n, "c": str(uuid4())}
params = {"stream_id": stream_id, "n": n, "c": str(uuid4())}
if unread:
params["xt"] = self.READ_TAG

if starred:
params["it"] = self.STARRED_TAG

fetched_count = 0
response = self.parse_response(self.session.post(url, params=params, proxies=self.proxies))
for data in response["items"]:
items, continuation = self.__get_stream_contents(**params)
for data in items:
categories = {
category.split("/")[-1]
for category in data.get("categories", [])
Expand All @@ -202,13 +192,10 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
if limit and fetched_count >= limit:
break

continuation = response.get("continuation")
while continuation and (not limit or fetched_count < limit):
params["c"] = continuation
response = self.parse_response(
self.session.post(url, params=params, proxies=self.proxies)
)
for data in response["items"]:
items, continuation = self.__get_stream_contents(**params)
for data in items:
categories = {
category.split("/")[-1]
for category in data.get("categories", [])
Expand All @@ -221,14 +208,14 @@ def fetch_articles(self, folder=None, tags=None, unread=True, starred=False, lim
if limit and fetched_count >= limit:
break

continuation = response.get("continuation")

def fetch_unread(self, folder=None, tags=None, limit=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=True):
def fetch_unread(self, folder=None, tags=None, limit=None, n=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=True, n=n):
yield article

def fetch_starred(self, folder=None, tags=None, limit=None):
for article in self.fetch_articles(folder=folder, tags=tags, unread=False, starred=True):
def fetch_starred(self, folder=None, tags=None, limit=None, n=None):
for article in self.fetch_articles(
folder=folder, tags=tags, unread=False, starred=True, n=n
):
yield article

def add_general_label(self, articles, label):
Expand Down
24 changes: 18 additions & 6 deletions inoreader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ def list_tags():
@main.command("fetch-unread")
@click.option("-f", "--folder", required=True, help="Folder which articles belong to")
@click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option("-o", "--outfile", required=True, help="Filename to save articles")
@click.option(
"--out-format",
Expand All @@ -208,14 +211,14 @@ def list_tags():
help="Format of output file, default: json",
)
@catch_error
def fetch_unread(folder, tags, outfile, out_format):
def fetch_unread(folder, tags, batch_size, outfile, out_format):
"""Fetch unread articles"""
client = get_client()

tag_list = [] if not tags else tags.split(",")
fout = codecs.open(outfile, mode="w", encoding="utf-8")
writer = csv.writer(fout, delimiter=",") if out_format == "csv" else None
for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list)):
for idx, article in enumerate(client.fetch_unread(folder=folder, tags=tag_list, n=batch_size)):
if idx > 0 and (idx % 10) == 0:
LOGGER.info("fetched %d articles", idx)
title = article.title
Expand Down Expand Up @@ -391,6 +394,10 @@ def get_subscriptions(outfile, folder, out_format):

@main.command("fetch-articles")
@click.option("-i", "--stream-id", required=True, help="Stream ID which you want to fetch")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option("--only-unread", is_flag=True, help="Fetch unread articles only")
@click.option("-o", "--outfile", required=True, help="Filename to save results")
@click.option(
"--out-format",
Expand All @@ -399,7 +406,7 @@ def get_subscriptions(outfile, folder, out_format):
help="Format of output, default: json",
)
@catch_error
def fetch_articles(outfile, stream_id, out_format):
def fetch_articles(outfile, stream_id, batch_size, only_unread, out_format):
"""Fetch articles by stream id"""
client = get_client()

Expand All @@ -409,7 +416,9 @@ def fetch_articles(outfile, stream_id, out_format):
writer = csv.DictWriter(fout, ["title", "content"], delimiter=",", quoting=csv.QUOTE_ALL)
writer.writeheader()

for idx, article in enumerate(client.get_stream_contents(stream_id)):
for idx, article in enumerate(
client.fetch_articles(stream_id=stream_id, n=batch_size, unread=only_unread)
):
if idx > 0 and (idx % 10) == 0:
LOGGER.info("fetched %d articles", idx)

Expand Down Expand Up @@ -469,6 +478,9 @@ def dedupe(folder, thresh):
@main.command("fetch-starred")
@click.option("-f", "--folder", help="Folder which articles belong to")
@click.option("-t", "--tags", help="Tag(s) for filtering, separate with comma")
@click.option(
"--batch-size", type=int, default=50, help="Maximum number of articles per API request"
)
@click.option(
"-o", "--outfile", help="Filename to save articles, required when output format is `csv`"
)
Expand All @@ -484,7 +496,7 @@ def dedupe(folder, thresh):
help="Format of output file, default: json",
)
@catch_error
def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
def fetch_starred(folder, tags, batch_size, outfile, outdir, limit, save_image, out_format):
"""Fetch starred articles"""
client = get_client()

Expand All @@ -506,7 +518,7 @@ def fetch_starred(folder, tags, outfile, outdir, limit, save_image, out_format):
tag_list = [] if not tags else tags.split(",")
url_to_image = {}
fetched_count = 0
for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit):
for article in client.fetch_starred(folder=folder, tags=tag_list, limit=limit, n=batch_size):
if limit and fetched_count >= limit:
break

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "python-inoreader"
version = "0.6.0"
version = "0.7.0"
description = "Python wrapper of Inoreader API"
authors = [
{name = "Linusp", email = "[email protected]"},
Expand Down
Loading