-
Notifications
You must be signed in to change notification settings - Fork 306
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
110 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ feedparser and its unit tests are released under the following license: | |
|
||
----- begin license block ----- | ||
|
||
Copyright (C) 2010-2015 Kurt McKee <[email protected]> | ||
Copyright (C) 2010-2022 Kurt McKee <[email protected]> | ||
Copyright (C) 2002-2008 Mark Pilgrim | ||
All rights reserved. | ||
|
||
|
@@ -38,7 +38,7 @@ released under the following license: | |
|
||
----- begin license block ----- | ||
|
||
Copyright (C) 2010-2015 Kurt McKee <[email protected]> | ||
Copyright (C) 2010-2022 Kurt McKee <[email protected]> | ||
Copyright (C) 2004-2008 Mark Pilgrim. All rights reserved. | ||
|
||
Redistribution and use in source (Sphinx ReST) and "compiled" forms (HTML, PDF, | ||
|
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,19 @@ | ||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> | ||
<addon id="script.module.feedparser" name="feedparser" version="6.0.2" provider-name="Kurt McKee"> | ||
<requires> | ||
<import addon="xbmc.python" version="3.0.0"/> | ||
<import addon="script.module.sgmllib3k" version="1.0.0+matrix.1"/> | ||
</requires> | ||
<extension point="xbmc.python.module" library="lib"/> | ||
<extension point="xbmc.addon.metadata"> | ||
<language></language> | ||
<summary lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</summary> | ||
<description lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</description> | ||
<license>MIT</license> | ||
<platform>all</platform> | ||
<source>https://github.com/kurtmckee/feedparser</source> | ||
<assets> | ||
<icon>icon.png</icon> | ||
</assets> | ||
</extension> | ||
<addon id="script.module.feedparser" name="feedparser" version="6.0.11" provider-name="Kurt McKee"> | ||
<requires> | ||
<import addon="xbmc.python" version="3.0.0" /> | ||
<import addon="script.module.sgmllib3k" version="1.0.0+matrix.1" /> | ||
</requires> | ||
<extension point="xbmc.python.module" library="lib" /> | ||
<extension point="xbmc.addon.metadata"> | ||
<summary lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</summary> | ||
<description lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</description> | ||
<license>BSD-2-Clause</license> | ||
<platform>all</platform> | ||
<website>https://github.com/kurtmckee/feedparser</website> | ||
<source>https://github.com/kurtmckee/feedparser</source> | ||
<assets> | ||
<icon>resources/icon.png</icon> | ||
</assets> | ||
</extension> | ||
</addon> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
@@ -32,7 +32,7 @@ | |
|
||
__author__ = 'Kurt McKee <[email protected]>' | ||
__license__ = 'BSD 2-clause' | ||
__version__ = '6.0.2' | ||
__version__ = '6.0.11' | ||
|
||
# HTTP "User-Agent" header to send to servers when downloading feeds. | ||
# If you are embedding feedparser in a larger application, you should | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# The public API for feedparser | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
@@ -27,6 +27,7 @@ | |
# POSSIBILITY OF SUCH DAMAGE. | ||
|
||
import io | ||
import urllib.error | ||
import urllib.parse | ||
import xml.sax | ||
|
||
|
@@ -211,7 +212,14 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer | |
headers={}, | ||
) | ||
|
||
data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result) | ||
try: | ||
data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result) | ||
except urllib.error.URLError as error: | ||
result.update({ | ||
'bozo': True, | ||
'bozo_exception': error, | ||
}) | ||
return result | ||
|
||
if not data: | ||
return result | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# Character encoding routines | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
@@ -26,9 +26,9 @@ | |
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
# POSSIBILITY OF SUCH DAMAGE. | ||
|
||
import cgi | ||
import codecs | ||
import re | ||
import typing as t | ||
|
||
try: | ||
try: | ||
|
@@ -68,6 +68,30 @@ def lazy_chardet_encoding(data): | |
RE_XML_PI_ENCODING = re.compile(br'^<\?.*encoding=[\'"](.*?)[\'"].*\?>') | ||
|
||
|
||
def parse_content_type(line: str) -> t.Tuple[str, str]: | ||
"""Parse an HTTP Content-Type header. | ||
The return value will be a tuple of strings: | ||
the MIME type, and the value of the "charset" (if any). | ||
This is a custom replacement for Python's cgi.parse_header(). | ||
The cgi module will be removed in Python 3.13. | ||
""" | ||
|
||
chunks = line.split(";") | ||
if not chunks: | ||
return "", "" | ||
|
||
mime_type = chunks[0].strip() | ||
charset_value = "" | ||
for chunk in chunks[1:]: | ||
key, _, value = chunk.partition("=") | ||
if key.strip().lower() == "charset": | ||
charset_value = value.strip().strip("\"'") | ||
|
||
return mime_type, charset_value | ||
|
||
|
||
def convert_to_utf8(http_headers, data, result): | ||
"""Detect and convert the character encoding to UTF-8. | ||
|
@@ -181,10 +205,7 @@ def convert_to_utf8(http_headers, data, result): | |
# XML declaration encoding, and HTTP encoding, following the | ||
# heuristic defined in RFC 3023. | ||
http_content_type = http_headers.get('content-type') or '' | ||
http_content_type, params = cgi.parse_header(http_content_type) | ||
http_encoding = params.get('charset', '').replace("'", "") | ||
if isinstance(http_encoding, bytes): | ||
http_encoding = http_encoding.decode('utf-8', 'ignore') | ||
http_content_type, http_encoding = parse_content_type(http_content_type) | ||
|
||
acceptable_content_type = 0 | ||
application_content_types = ('application/xml', 'application/xml-dtd', | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# Exceptions used throughout feedparser | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
@@ -53,6 +53,8 @@ def http_error_default(self, req, fp, code, msg, headers): | |
|
||
def http_error_301(self, req, fp, code, msg, hdrs): | ||
result = urllib.request.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, hdrs) | ||
if not result: | ||
return fp | ||
result.status = code | ||
result.newurl = result.geturl() | ||
return result | ||
|
@@ -78,7 +80,7 @@ def http_error_401(self, req, fp, code, msg, headers): | |
host = urllib.parse.urlparse(req.get_full_url())[1] | ||
if 'Authorization' not in req.headers or 'WWW-Authenticate' not in headers: | ||
return self.http_error_default(req, fp, code, msg, headers) | ||
auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode('utf8')) | ||
auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode()).decode() | ||
user, passw = auth.split(':') | ||
realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] | ||
self.add_password(realm, host, user, passw) | ||
|
@@ -145,12 +147,23 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None, | |
if url_pieces.port: | ||
new_pieces[1] = f'{url_pieces.hostname}:{url_pieces.port}' | ||
url = urllib.parse.urlunparse(new_pieces) | ||
auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}').strip() | ||
auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}'.encode()).decode() | ||
|
||
# iri support | ||
if not isinstance(url, bytes): | ||
url = convert_to_idn(url) | ||
|
||
# Prevent UnicodeEncodeErrors caused by Unicode characters in the path. | ||
bits = [] | ||
for c in url: | ||
try: | ||
c.encode('ascii') | ||
except UnicodeEncodeError: | ||
bits.append(urllib.parse.quote(c)) | ||
else: | ||
bits.append(c) | ||
url = ''.join(bits) | ||
|
||
# try to open with urllib2 (to use optional headers) | ||
request = _build_urllib2_request(url, agent, ACCEPT_HEADER, etag, modified, referrer, auth, request_headers) | ||
opener = urllib.request.build_opener(*tuple(handlers + [_FeedURLHandler()])) | ||
|
@@ -203,7 +216,7 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None, | |
result['href'] = f.url.decode('utf-8', 'ignore') | ||
else: | ||
result['href'] = f.url | ||
result['status'] = getattr(f, 'status', 200) | ||
result['status'] = getattr(f, 'status', None) or 200 | ||
|
||
# Stop processing if the server sent HTTP 304 Not Modified. | ||
if getattr(f, 'code', 0) == 304: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright 2010-2020 Kurt McKee <[email protected]> | ||
# Copyright 2010-2023 Kurt McKee <[email protected]> | ||
# Copyright 2002-2008 Mark Pilgrim | ||
# All rights reserved. | ||
# | ||
|
@@ -193,6 +193,7 @@ def __init__(self): | |
self.svgOK = 0 | ||
self.title_depth = -1 | ||
self.depth = 0 | ||
self.hasContent = 0 | ||
if self.lang: | ||
self.feeddata['language'] = self.lang.replace('_', '-') | ||
|
||
|
@@ -506,9 +507,7 @@ def pop(self, element, strip_whitespace=1): | |
if base64 and self.contentparams.get('base64', 0): | ||
try: | ||
output = base64.decodebytes(output.encode('utf8')).decode('utf8') | ||
except binascii.Error: | ||
pass | ||
except binascii.Incomplete: | ||
except (binascii.Error, binascii.Incomplete, UnicodeDecodeError): | ||
pass | ||
|
||
# resolve relative URIs | ||
|
Oops, something went wrong.