[script.module.feedparser] 6.0.11

xbmc · Dec 11, 2023 · 7bd036a · 7bd036a
1 parent 4766b1b
commit 7bd036a
Show file tree

Hide file tree

Showing 34 changed files with 110 additions and 110 deletions.
diff --git a/script.module.feedparser/LICENSE → script.module.feedparser/LICENSE.txt b/script.module.feedparser/LICENSE → script.module.feedparser/LICENSE.txt
@@ -2,7 +2,7 @@ feedparser and its unit tests are released under the following license:
 
 ----- begin license block -----
 
-Copyright (C) 2010-2015 Kurt McKee <[email protected]>
+Copyright (C) 2010-2022 Kurt McKee <[email protected]>
 Copyright (C) 2002-2008 Mark Pilgrim
 All rights reserved.
 
@@ -38,7 +38,7 @@ released under the following license:
 
 ----- begin license block -----
 
-Copyright (C) 2010-2015 Kurt McKee <[email protected]>
+Copyright (C) 2010-2022 Kurt McKee <[email protected]>
 Copyright (C) 2004-2008 Mark Pilgrim. All rights reserved.
 
 Redistribution and use in source (Sphinx ReST) and "compiled" forms (HTML, PDF,

diff --git a/script.module.feedparser/README.rst b/script.module.feedparser/README.rst
diff --git a/script.module.feedparser/addon.xml b/script.module.feedparser/addon.xml
@@ -1,19 +1,19 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<addon id="script.module.feedparser" name="feedparser" version="6.0.2" provider-name="Kurt McKee">
-    <requires>
-        <import addon="xbmc.python" version="3.0.0"/>
-        <import addon="script.module.sgmllib3k" version="1.0.0+matrix.1"/>
-    </requires>
-    <extension point="xbmc.python.module" library="lib"/>
-    <extension point="xbmc.addon.metadata">
-        <language></language>
-        <summary lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</summary>
-        <description lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</description>
-        <license>MIT</license>
-        <platform>all</platform>
-        <source>https://github.com/kurtmckee/feedparser</source>
-        <assets>
-            <icon>icon.png</icon>
-        </assets>
-    </extension>
+<addon id="script.module.feedparser" name="feedparser" version="6.0.11" provider-name="Kurt McKee">
+  <requires>
+    <import addon="xbmc.python" version="3.0.0" />
+    <import addon="script.module.sgmllib3k" version="1.0.0+matrix.1" />
+  </requires>
+  <extension point="xbmc.python.module" library="lib" />
+  <extension point="xbmc.addon.metadata">
+    <summary lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</summary>
+    <description lang="en_GB">Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds</description>
+    <license>BSD-2-Clause</license>
+    <platform>all</platform>
+    <website>https://github.com/kurtmckee/feedparser</website>
+    <source>https://github.com/kurtmckee/feedparser</source>
+    <assets>
+      <icon>resources/icon.png</icon>
+    </assets>
+  </extension>
 </addon>
diff --git a/script.module.feedparser/lib/feedparser/__init__.py b/script.module.feedparser/lib/feedparser/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@@ -32,7 +32,7 @@
 
 __author__ = 'Kurt McKee <[email protected]>'
 __license__ = 'BSD 2-clause'
-__version__ = '6.0.2'
+__version__ = '6.0.11'
 
 # HTTP "User-Agent" header to send to servers when downloading feeds.
 # If you are embedding feedparser in a larger application, you should

diff --git a/script.module.feedparser/lib/feedparser/api.py b/script.module.feedparser/lib/feedparser/api.py
@@ -1,5 +1,5 @@
 # The public API for feedparser
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@@ -27,6 +27,7 @@
 # POSSIBILITY OF SUCH DAMAGE.
 
 import io
+import urllib.error
 import urllib.parse
 import xml.sax
 
@@ -211,7 +212,14 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
         headers={},
     )
 
-    data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
+    try:
+        data = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers, request_headers, result)
+    except urllib.error.URLError as error:
+        result.update({
+            'bozo': True,
+            'bozo_exception': error,
+        })
+        return result
 
     if not data:
         return result

diff --git a/script.module.feedparser/lib/feedparser/datetimes/__init__.py b/script.module.feedparser/lib/feedparser/datetimes/__init__.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/asctime.py b/script.module.feedparser/lib/feedparser/datetimes/asctime.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/greek.py b/script.module.feedparser/lib/feedparser/datetimes/greek.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/hungarian.py b/script.module.feedparser/lib/feedparser/datetimes/hungarian.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/iso8601.py b/script.module.feedparser/lib/feedparser/datetimes/iso8601.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/korean.py b/script.module.feedparser/lib/feedparser/datetimes/korean.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/perforce.py b/script.module.feedparser/lib/feedparser/datetimes/perforce.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/rfc822.py b/script.module.feedparser/lib/feedparser/datetimes/rfc822.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/datetimes/w3dtf.py b/script.module.feedparser/lib/feedparser/datetimes/w3dtf.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/encodings.py b/script.module.feedparser/lib/feedparser/encodings.py
@@ -1,5 +1,5 @@
 # Character encoding routines
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@@ -26,9 +26,9 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-import cgi
 import codecs
 import re
+import typing as t
 
 try:
     try:
@@ -68,6 +68,30 @@ def lazy_chardet_encoding(data):
 RE_XML_PI_ENCODING = re.compile(br'^<\?.*encoding=[\'"](.*?)[\'"].*\?>')
 
 
+def parse_content_type(line: str) -> t.Tuple[str, str]:
+    """Parse an HTTP Content-Type header.
+
+    The return value will be a tuple of strings:
+    the MIME type, and the value of the "charset" (if any).
+
+    This is a custom replacement for Python's cgi.parse_header().
+    The cgi module will be removed in Python 3.13.
+    """
+
+    chunks = line.split(";")
+    if not chunks:
+        return "", ""
+
+    mime_type = chunks[0].strip()
+    charset_value = ""
+    for chunk in chunks[1:]:
+        key, _, value = chunk.partition("=")
+        if key.strip().lower() == "charset":
+            charset_value = value.strip().strip("\"'")
+
+    return mime_type, charset_value
+
+
 def convert_to_utf8(http_headers, data, result):
     """Detect and convert the character encoding to UTF-8.
 
@@ -181,10 +205,7 @@ def convert_to_utf8(http_headers, data, result):
     # XML declaration encoding, and HTTP encoding, following the
     # heuristic defined in RFC 3023.
     http_content_type = http_headers.get('content-type') or ''
-    http_content_type, params = cgi.parse_header(http_content_type)
-    http_encoding = params.get('charset', '').replace("'", "")
-    if isinstance(http_encoding, bytes):
-        http_encoding = http_encoding.decode('utf-8', 'ignore')
+    http_content_type, http_encoding = parse_content_type(http_content_type)
 
     acceptable_content_type = 0
     application_content_types = ('application/xml', 'application/xml-dtd',

diff --git a/script.module.feedparser/lib/feedparser/exceptions.py b/script.module.feedparser/lib/feedparser/exceptions.py
@@ -1,5 +1,5 @@
 # Exceptions used throughout feedparser
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/html.py b/script.module.feedparser/lib/feedparser/html.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #

diff --git a/script.module.feedparser/lib/feedparser/http.py b/script.module.feedparser/lib/feedparser/http.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@@ -53,6 +53,8 @@ def http_error_default(self, req, fp, code, msg, headers):
 
     def http_error_301(self, req, fp, code, msg, hdrs):
         result = urllib.request.HTTPRedirectHandler.http_error_301(self, req, fp, code, msg, hdrs)
+        if not result:
+            return fp
         result.status = code
         result.newurl = result.geturl()
         return result
@@ -78,7 +80,7 @@ def http_error_401(self, req, fp, code, msg, headers):
         host = urllib.parse.urlparse(req.get_full_url())[1]
         if 'Authorization' not in req.headers or 'WWW-Authenticate' not in headers:
             return self.http_error_default(req, fp, code, msg, headers)
-        auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode('utf8'))
+        auth = base64.decodebytes(req.headers['Authorization'].split(' ')[1].encode()).decode()
         user, passw = auth.split(':')
         realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0]
         self.add_password(realm, host, user, passw)
@@ -145,12 +147,23 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None,
             if url_pieces.port:
                 new_pieces[1] = f'{url_pieces.hostname}:{url_pieces.port}'
             url = urllib.parse.urlunparse(new_pieces)
-            auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}').strip()
+            auth = base64.standard_b64encode(f'{url_pieces.username}:{url_pieces.password}'.encode()).decode()
 
     # iri support
     if not isinstance(url, bytes):
         url = convert_to_idn(url)
 
+    # Prevent UnicodeEncodeErrors caused by Unicode characters in the path.
+    bits = []
+    for c in url:
+        try:
+            c.encode('ascii')
+        except UnicodeEncodeError:
+            bits.append(urllib.parse.quote(c))
+        else:
+            bits.append(c)
+    url = ''.join(bits)
+
     # try to open with urllib2 (to use optional headers)
     request = _build_urllib2_request(url, agent, ACCEPT_HEADER, etag, modified, referrer, auth, request_headers)
     opener = urllib.request.build_opener(*tuple(handlers + [_FeedURLHandler()]))
@@ -203,7 +216,7 @@ def get(url, etag=None, modified=None, agent=None, referrer=None, handlers=None,
         result['href'] = f.url.decode('utf-8', 'ignore')
     else:
         result['href'] = f.url
-    result['status'] = getattr(f, 'status', 200)
+    result['status'] = getattr(f, 'status', None) or 200
 
     # Stop processing if the server sent HTTP 304 Not Modified.
     if getattr(f, 'code', 0) == 304:

diff --git a/script.module.feedparser/lib/feedparser/mixin.py b/script.module.feedparser/lib/feedparser/mixin.py
@@ -1,4 +1,4 @@
-# Copyright 2010-2020 Kurt McKee <[email protected]>
+# Copyright 2010-2023 Kurt McKee <[email protected]>
 # Copyright 2002-2008 Mark Pilgrim
 # All rights reserved.
 #
@@ -193,6 +193,7 @@ def __init__(self):
         self.svgOK = 0
         self.title_depth = -1
         self.depth = 0
+        self.hasContent = 0
         if self.lang:
             self.feeddata['language'] = self.lang.replace('_', '-')
 
@@ -506,9 +507,7 @@ def pop(self, element, strip_whitespace=1):
         if base64 and self.contentparams.get('base64', 0):
             try:
                 output = base64.decodebytes(output.encode('utf8')).decode('utf8')
-            except binascii.Error:
-                pass
-            except binascii.Incomplete:
+            except (binascii.Error, binascii.Incomplete, UnicodeDecodeError):
                 pass
 
         # resolve relative URIs