Various new Russian and Ukrainian news sources

kovidgoyal · Aug 9, 2023 · 80ed90e · 80ed90e
1 parent cafd71b
commit 80ed90e
Show file tree

Hide file tree

Showing 14 changed files with 244 additions and 22 deletions.
diff --git a/recipes/echo_moskvy.recipe b/recipes/echo_moskvy.recipe
@@ -1,26 +1,34 @@
-# vim:fileencoding=utf-8
+from __future__ import unicode_literals, division, absolute_import, print_function
 from calibre.web.feeds.news import BasicNewsRecipe
 
+class EchoMsk(BasicNewsRecipe):
+    title          	  = '\u042D\u0425\u041E'
+    __author__            = 'bugmen00t'
+    description           = ('\u042D\u0425\u041E - \u043A\u0430\u043A \u043D\u0430 \u0441\u0442\u0430\u0440\u043E\u043C'
+                             ' \u0434\u043E\u0431\u0440\u043E\u043C \u0440\u0430\u0434\u0438\u043E')
+    publisher             = 'Radio Echo GmbH'
+    category              = 'news'
+    cover_url = u'https://echofm.online/logo.png'
+    language              = 'ru'
+    no_stylesheets        = True
+    remove_javascript = False
+    auto_cleanup   = False
+    oldest_article = 7
+    max_articles_per_feed = 50
 
-class AdjectiveSpecies(BasicNewsRecipe):
-    title = u'Эхо Москвы'
-    __author__ = 'bug_me_not'
-    cover_url = u'http://echo.msk.ru/i/logo.png'
-    description = 'Радиостанция Эхо Москвы'
-    publisher = 'Эхо Москвы'
-    category = 'news'
-    language = 'ru'
-    no_stylesheets = True
-    remove_javascript = True
-    oldest_article = 300
-    max_articles_per_feed = 100
+    remove_tags_before = dict(name='article')
 
-    remove_tags_before = dict(name='div', attrs={'class': 'topic'})
-    remove_tags_after = dict(name='div', attrs={'class': 'typical'})
-    remove_tags = [dict(name='div', attrs={'class': 'addInNetBlock'}),
-                   dict(name='div', attrs={'class': 'flash'})]
+    remove_tags_after = dict(name='article')
+
+    remove_tags =   [
+        dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}),
+        dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}),
+        dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'})
+        ]
 
     feeds = [
-        (u'Интервью и передачи', u'http://echo.msk.ru/interview/rss-fulltext.xml'),
-        (u'Блоги', u'http://echo.msk.ru/blog/rss.xml')
+        ('\u0413\u043B\u0430\u0432\u043D\u043E\u0435', 'https://echofm.online/feed'),
+        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://echofm.online/news/feed'),
+        ('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://echofm.online/opinions/feed'),
+        ('\u0414\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u044B', 'https://echofm.online/documents/feed')
     ]
diff --git a/recipes/footballua.recipe b/recipes/footballua.recipe
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class FootballUA(BasicNewsRecipe):
+    title = 'Football.UA'
+    __author__ = 'bugmen00t'
+    description = ('\u0421\u043F\u043E\u0440\u0442\u0438\u0432\u043D\u0438\u0439 \u043F\u043E\u0440\u0442\u0430\u043B'
+                   ' \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456,'
+                   ' \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043B\u0438\u0448\u0435 \u0444\u0443\u0442\u0431\u043E\u043B\u0443.')
+    publisher = 'United Media Holding group'
+    category = 'news'
+    cover_url = u'https://s.ill.in.ua/i/news/570x380/212/212438.jpg'
+    language = 'uk'
+    no_stylesheets = False
+    remove_javascript = False
+    auto_cleanup = False
+    remove_empty_feeds = True
+    oldest_article = 3
+    max_articles_per_feed = 20
+
+    remove_tags_before = dict(name='article')
+
+    remove_tags_after = dict(name='article')
+
+    remove_tags =   [
+         dict(name='div', attrs={'class': 'bottom-info'}),
+         dict(name='div', attrs={'class': 'social-buttons'})
+        ]
+
+    feeds = [
+        ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://football.ua/rss2.ashx'),
+        ]
diff --git a/recipes/icons/echo_moskvy.png b/recipes/icons/echo_moskvy.png
diff --git a/recipes/icons/fooballua.png b/recipes/icons/fooballua.png
diff --git a/recipes/icons/prosleduet.png b/recipes/icons/prosleduet.png
diff --git a/recipes/icons/ua_fooball.png b/recipes/icons/ua_fooball.png
diff --git a/recipes/icons/unian_net_en.png b/recipes/icons/unian_net_en.png
diff --git a/recipes/icons/unian_net_ua.png b/recipes/icons/unian_net_ua.png
diff --git a/recipes/prosleduet.recipe b/recipes/prosleduet.recipe
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class ProSleduet(BasicNewsRecipe):
+    title          	  = '\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442'
+    __author__            = 'bugmen00t'
+    description           = ('\u0414\u0438\u0434\u0436\u0438\u0442\u0430\u043B-\u043F\u0440\u043E\u0435\u043A\u0442'
+                             ' \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432'
+                             ' \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB')
+    publisher             = 'Pavel Kanygin, Natalia Zhdanova'
+    category              = 'news'
+    cover_url = u'https://prosleduet.media/wp-content/themes/prosle/assets/img/logo.svg'
+    language              = 'ru'
+    no_stylesheets        = True
+    remove_javascript = False
+    auto_cleanup   = False
+    oldest_article = 7
+    max_articles_per_feed = 20
+
+    remove_tags_before = dict(name='div', attrs={'class': 'container'})
+
+    remove_tags_after = dict(name='div', attrs={'class': 'container'})
+
+    remove_tags =   [
+        dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'})
+        ]
+
+    feeds = [
+#        ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'),
+        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://prosleduet.media/category/news/feed/'),
+        ('\u041B\u044E\u0434\u0438', 'https://prosleduet.media/category/people/feed/'),
+        ('\u0421\u044E\u0436\u0435\u0442\u044B', 'https://prosleduet.media/category/syuzhety/feed/'),
+        ('\u041F\u043E\u0434\u043A\u0430\u0441\u0442\u044B', 'https://prosleduet.media/category/podcasts/feed/'),
+        ('\u0420\u0430\u0437\u0431\u043E\u0440\u044B', 'https://prosleduet.media/category/details/feed/'),
+        ('\u0413\u043B\u0443\u0431\u0438\u043D\u043D\u0430\u044F \u0420\u043E\u0441\u0441\u0438\u044F', 'https://prosleduet.media/category/glubinnaya-rossiya/feed/')
+    ]
diff --git a/recipes/ua_fooball.recipe b/recipes/ua_fooball.recipe
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class UAFootball(BasicNewsRecipe):
+
+#Russian version
+#    title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B'
+#    language = 'ru_UK'
+#    feeds = [
+#        ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0444\u0443\u0442\u0431\u043E\u043B\u0430', 'https://www.ua-football.com/rss/all.xml')
+#        ]
+
+#Ukrainian version
+    title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B'
+    description = ('\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0456 \u0442\u0435\u043C\u0438'
+                   ' \u0444\u0443\u0442\u0431\u043E\u043B\u044C\u043D\u043E\u0433\u043E'
+                   ' \u0436\u0438\u0442\u0442\u044F \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0442\u0430'
+                   ' \u0432\u0441\u044C\u043E\u0433\u043E \u0441\u0432\u0456\u0442\u0443.')
+    language = 'uk'
+    feeds = [
+        ('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.ua-football.com/ua/rss/all.xml')
+        ]
+
+    __author__ = 'bugmen00t'
+    publisher = '1766 TEAM EOOD'
+    category = 'news'
+    cover_url = u'https://yt3.googleusercontent.com/11FSvKeWcjFhzKrO7nXZdc-I__UeZ0mhZwbwyOHtnx_1-q6d0zQ2LbOt2duNCY06JVg2cGXS-g=s900-c-k-c0x00ffffff-no-rj'
+    no_stylesheets = False
+    remove_javascript = False
+    auto_cleanup = False
+    remove_empty_feeds = True
+    oldest_article = 7
+    max_articles_per_feed = 200
+
+    remove_tags_before = dict(name='h1')
+
+    remove_tags_after = dict(name='div', attrs={'class': 'show-post'})
+
+    remove_tags =   [
+         dict(name='form'),
+         dict(name='iframe'),
+         dict(name='div', attrs={'class': 'language'}),
+         dict(name='div', attrs={'class': 'article__read-also'}),
+         dict(name='div', attrs={'class': 'card-player'}),
+         dict(name='div', attrs={'class': 'show-post-socials'})
+         ]
+
+# Replacing articles in Ukraininan for RU-feed
+#    def print_version(self, url):
+#        return url.replace('ua-football.com/ua/', 'ua-football.com/')
diff --git a/recipes/unian_net.recipe b/recipes/unian_net.recipe
@@ -11,7 +11,7 @@ class Unian(BasicNewsRecipe):
     publication_type = 'newspaper'
     oldest_article = 7
     max_articles_per_feed = 100
-    language = 'ru'
+    language = 'ru_UK'
     cover_url = 'https://www.unian.net/images/unian-512x512.png'
     auto_cleanup = False
     no_stylesheets = True
@@ -21,9 +21,15 @@ class Unian(BasicNewsRecipe):
     remove_tags = [
         dict(name='span', attrs={'class': 'article__info-item comments'}),
         dict(name='span', attrs={'class': 'article__info-item views'}),
-        dict(name='div', attrs={'class': 'read-also-slider'})
+        dict(name='div', attrs={'class': 'read-also-slider'}),
+        dict(name='div', attrs={'class': 'nts-video-wrapper'})
     ]
 
     feeds = [
     (u'\u0423\u041D\u0418\u0410\u041D', u'https://rss.unian.net/site/news_rus.rss')
     ]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-src': True}):
+            img['src'] = img['data-src']
+        return soup
diff --git a/recipes/unian_net_en.recipe b/recipes/unian_net_en.recipe
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Unian(BasicNewsRecipe):
+    title = 'UNIAN'
+    description = ('UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency,'
+                   ' first in Ukraine, founded in 1993, remaining the leader among the country\'s news media,'
+                   ' being the most cited source of news from across Ukraine.')
+    __author__ = 'bugmen00t'
+    publication_type = 'newspaper'
+    oldest_article = 30
+    max_articles_per_feed = 100
+    language = 'en_UK'
+    cover_url = 'https://www.unian.info/images/unian-512x512.png'
+    auto_cleanup = False
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
+    remove_tags = [
+        dict(name='span', attrs={'class': 'article__info-item comments'}),
+        dict(name='span', attrs={'class': 'article__info-item views'}),
+        dict(name='div', attrs={'class': 'read-also-slider'}),
+        dict(name='div', attrs={'class': 'nts-video-wrapper'})
+    ]
+
+    feeds = [
+    (u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss')
+    ]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-src': True}):
+            img['src'] = img['data-src']
+        return soup
diff --git a/recipes/unian_net_ua.recipe b/recipes/unian_net_ua.recipe
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class Unian(BasicNewsRecipe):
+    title = '\u0423\u041D\u0406\u0410\u041D'
+    description = (
+        '\u0423\u041D\u0406\u0410\u041D (\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435'
+        ' \u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435 \u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435'
+        ' \u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u041D\u043E\u0432\u0438\u043D) -'
+        ' \u043F\u0435\u0440\u0448\u0435 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430'
+        ' \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448\u0435 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435'
+        ' \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E,'
+        ' \u0437\u0430\u0441\u043D\u043E\u0432\u0430\u043D\u0435 1993 \u0440\u043E\u043A\u0443, \u043B\u0456\u0434\u0435\u0440'
+        ' \u0441\u0435\u0440\u0435\u0434 \u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0445 \u043C\u0435\u0434\u0456\u0430'
+        ' \u043A\u0440\u0430\u0457\u043D\u0438, \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448'
+        ' \u0446\u0438\u0442\u043E\u0432\u0430\u043D\u0435 \u0434\u0436\u0435\u0440\u0435\u043B\u043E'
+        ' \u043D\u043E\u0432\u0438\u043D \u043F\u0440\u043E \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0456.')
+    __author__ = 'bugmen00t'
+    publication_type = 'newspaper'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    language = 'uk'
+    cover_url = 'https://www.unian.ua/images/unian-512x512.png'
+    auto_cleanup = False
+    no_stylesheets = True
+
+    remove_tags_before = dict(name='h1')
+    remove_tags_after = dict(name='div', attrs={'class': 'article-text'})
+    remove_tags = [
+        dict(name='span', attrs={'class': 'article__info-item comments'}),
+        dict(name='span', attrs={'class': 'article__info-item views'}),
+        dict(name='div', attrs={'class': 'read-also-slider'}),
+        dict(name='div', attrs={'class': 'nts-video-wrapper'})
+    ]
+
+    feeds = [
+    (u'\u0423\u041D\u0406\u0410\u041D', u'https://rss.unian.net/site/news_ukr.rss')
+    ]
+
+    def preprocess_html(self, soup):
+        for img in soup.findAll('img', attrs={'data-src': True}):
+            img['src'] = img['data-src']
+        return soup
diff --git a/src/calibre/gui2/store/stores/virtualo_plugin.py b/src/calibre/gui2/store/stores/virtualo_plugin.py
@@ -74,7 +74,8 @@ def search(self, query, max_results=12, timeout=60):
                 if not id:
                     continue
 
-                price = ''.join(data.xpath('.//div[@class="info"]//div[@class="price"]/div/text()|.//div[@class="info"]//div[@class="price price--no-promo"]/div/text()'))
+                price = ''.join(data.xpath(
+                    './/div[@class="info"]//div[@class="price"]/div/text()|.//div[@class="info"]//div[@class="price price--no-promo"]/div/text()'))
                 cover_url = ''.join(data.xpath('.//img[@class="cover"]/@src'))
                 title = ''.join(data.xpath('.//h3[@class="title"]/a//text()'))
                 author = ', '.join(data.xpath('.//div[@class="info"]//div[@class="authors"]/a//text()'))