-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Various new Russian and Ukrainian news sources
- Loading branch information
1 parent
cafd71b
commit 80ed90e
Showing
14 changed files
with
244 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,34 @@ | ||
# vim:fileencoding=utf-8 | ||
from __future__ import unicode_literals, division, absolute_import, print_function | ||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class EchoMsk(BasicNewsRecipe): | ||
title = '\u042D\u0425\u041E' | ||
__author__ = 'bugmen00t' | ||
description = ('\u042D\u0425\u041E - \u043A\u0430\u043A \u043D\u0430 \u0441\u0442\u0430\u0440\u043E\u043C' | ||
' \u0434\u043E\u0431\u0440\u043E\u043C \u0440\u0430\u0434\u0438\u043E') | ||
publisher = 'Radio Echo GmbH' | ||
category = 'news' | ||
cover_url = u'https://echofm.online/logo.png' | ||
language = 'ru' | ||
no_stylesheets = True | ||
remove_javascript = False | ||
auto_cleanup = False | ||
oldest_article = 7 | ||
max_articles_per_feed = 50 | ||
|
||
class AdjectiveSpecies(BasicNewsRecipe): | ||
title = u'Эхо Москвы' | ||
__author__ = 'bug_me_not' | ||
cover_url = u'http://echo.msk.ru/i/logo.png' | ||
description = 'Радиостанция Эхо Москвы' | ||
publisher = 'Эхо Москвы' | ||
category = 'news' | ||
language = 'ru' | ||
no_stylesheets = True | ||
remove_javascript = True | ||
oldest_article = 300 | ||
max_articles_per_feed = 100 | ||
remove_tags_before = dict(name='article') | ||
|
||
remove_tags_before = dict(name='div', attrs={'class': 'topic'}) | ||
remove_tags_after = dict(name='div', attrs={'class': 'typical'}) | ||
remove_tags = [dict(name='div', attrs={'class': 'addInNetBlock'}), | ||
dict(name='div', attrs={'class': 'flash'})] | ||
remove_tags_after = dict(name='article') | ||
|
||
remove_tags = [ | ||
dict(name='span', attrs={'class': 'sc-7b4cbb79-0 guzUFC'}), | ||
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}), | ||
dict(name='div', attrs={'class': 'sc-f94c4ef5-0 frGiYu'}) | ||
] | ||
|
||
feeds = [ | ||
(u'Интервью и передачи', u'http://echo.msk.ru/interview/rss-fulltext.xml'), | ||
(u'Блоги', u'http://echo.msk.ru/blog/rss.xml') | ||
('\u0413\u043B\u0430\u0432\u043D\u043E\u0435', 'https://echofm.online/feed'), | ||
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://echofm.online/news/feed'), | ||
('\u041C\u043D\u0435\u043D\u0438\u044F', 'https://echofm.online/opinions/feed'), | ||
('\u0414\u043E\u043A\u0443\u043C\u0435\u043D\u0442\u044B', 'https://echofm.online/documents/feed') | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=utf-8 | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class FootballUA(BasicNewsRecipe): | ||
title = 'Football.UA' | ||
__author__ = 'bugmen00t' | ||
description = ('\u0421\u043F\u043E\u0440\u0442\u0438\u0432\u043D\u0438\u0439 \u043F\u043E\u0440\u0442\u0430\u043B' | ||
' \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456,' | ||
' \u043F\u0440\u0438\u0441\u0432\u044F\u0447\u0435\u043D\u0438\u0439 \u043B\u0438\u0448\u0435 \u0444\u0443\u0442\u0431\u043E\u043B\u0443.') | ||
publisher = 'United Media Holding group' | ||
category = 'news' | ||
cover_url = u'https://s.ill.in.ua/i/news/570x380/212/212438.jpg' | ||
language = 'uk' | ||
no_stylesheets = False | ||
remove_javascript = False | ||
auto_cleanup = False | ||
remove_empty_feeds = True | ||
oldest_article = 3 | ||
max_articles_per_feed = 20 | ||
|
||
remove_tags_before = dict(name='article') | ||
|
||
remove_tags_after = dict(name='article') | ||
|
||
remove_tags = [ | ||
dict(name='div', attrs={'class': 'bottom-info'}), | ||
dict(name='div', attrs={'class': 'social-buttons'}) | ||
] | ||
|
||
feeds = [ | ||
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://football.ua/rss2.ashx'), | ||
] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=utf-8 | ||
|
||
from __future__ import unicode_literals, division, absolute_import, print_function | ||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class ProSleduet(BasicNewsRecipe): | ||
title = '\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442' | ||
__author__ = 'bugmen00t' | ||
description = ('\u0414\u0438\u0434\u0436\u0438\u0442\u0430\u043B-\u043F\u0440\u043E\u0435\u043A\u0442' | ||
' \u0436\u0443\u0440\u043D\u0430\u043B\u0438\u0441\u0442\u043E\u0432' | ||
' \u00AB\u041D\u043E\u0432\u043E\u0439 \u0433\u0430\u0437\u0435\u0442\u044B\u00BB') | ||
publisher = 'Pavel Kanygin, Natalia Zhdanova' | ||
category = 'news' | ||
cover_url = u'https://prosleduet.media/wp-content/themes/prosle/assets/img/logo.svg' | ||
language = 'ru' | ||
no_stylesheets = True | ||
remove_javascript = False | ||
auto_cleanup = False | ||
oldest_article = 7 | ||
max_articles_per_feed = 20 | ||
|
||
remove_tags_before = dict(name='div', attrs={'class': 'container'}) | ||
|
||
remove_tags_after = dict(name='div', attrs={'class': 'container'}) | ||
|
||
remove_tags = [ | ||
dict(name='div', attrs={'class': 'ya-share2 ya-share2_inited'}) | ||
] | ||
|
||
feeds = [ | ||
# ('\u041F\u0440\u043E\u0434\u043E\u043B\u0436\u0435\u043D\u0438\u0435 \u0441\u043B\u0435\u0434\u0443\u0435\u0442', 'https://prosleduet.media/feed/'), | ||
('\u041D\u043E\u0432\u043E\u0441\u0442\u0438', 'https://prosleduet.media/category/news/feed/'), | ||
('\u041B\u044E\u0434\u0438', 'https://prosleduet.media/category/people/feed/'), | ||
('\u0421\u044E\u0436\u0435\u0442\u044B', 'https://prosleduet.media/category/syuzhety/feed/'), | ||
('\u041F\u043E\u0434\u043A\u0430\u0441\u0442\u044B', 'https://prosleduet.media/category/podcasts/feed/'), | ||
('\u0420\u0430\u0437\u0431\u043E\u0440\u044B', 'https://prosleduet.media/category/details/feed/'), | ||
('\u0413\u043B\u0443\u0431\u0438\u043D\u043D\u0430\u044F \u0420\u043E\u0441\u0441\u0438\u044F', 'https://prosleduet.media/category/glubinnaya-rossiya/feed/') | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=utf-8 | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class UAFootball(BasicNewsRecipe): | ||
|
||
#Russian version | ||
# title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B' | ||
# language = 'ru_UK' | ||
# feeds = [ | ||
# ('\u041D\u043E\u0432\u043E\u0441\u0442\u0438 \u0444\u0443\u0442\u0431\u043E\u043B\u0430', 'https://www.ua-football.com/rss/all.xml') | ||
# ] | ||
|
||
#Ukrainian version | ||
title = 'UA-\u0424\u0443\u0442\u0431\u043E\u043B' | ||
description = ('\u0410\u043A\u0442\u0443\u0430\u043B\u044C\u043D\u0456 \u0442\u0435\u043C\u0438' | ||
' \u0444\u0443\u0442\u0431\u043E\u043B\u044C\u043D\u043E\u0433\u043E' | ||
' \u0436\u0438\u0442\u0442\u044F \u0423\u043A\u0440\u0430\u0457\u043D\u0438 \u0442\u0430' | ||
' \u0432\u0441\u044C\u043E\u0433\u043E \u0441\u0432\u0456\u0442\u0443.') | ||
language = 'uk' | ||
feeds = [ | ||
('\u041D\u043E\u0432\u0438\u043D\u0438', 'https://www.ua-football.com/ua/rss/all.xml') | ||
] | ||
|
||
__author__ = 'bugmen00t' | ||
publisher = '1766 TEAM EOOD' | ||
category = 'news' | ||
cover_url = u'https://yt3.googleusercontent.com/11FSvKeWcjFhzKrO7nXZdc-I__UeZ0mhZwbwyOHtnx_1-q6d0zQ2LbOt2duNCY06JVg2cGXS-g=s900-c-k-c0x00ffffff-no-rj' | ||
no_stylesheets = False | ||
remove_javascript = False | ||
auto_cleanup = False | ||
remove_empty_feeds = True | ||
oldest_article = 7 | ||
max_articles_per_feed = 200 | ||
|
||
remove_tags_before = dict(name='h1') | ||
|
||
remove_tags_after = dict(name='div', attrs={'class': 'show-post'}) | ||
|
||
remove_tags = [ | ||
dict(name='form'), | ||
dict(name='iframe'), | ||
dict(name='div', attrs={'class': 'language'}), | ||
dict(name='div', attrs={'class': 'article__read-also'}), | ||
dict(name='div', attrs={'class': 'card-player'}), | ||
dict(name='div', attrs={'class': 'show-post-socials'}) | ||
] | ||
|
||
# Replacing articles in Ukraininan for RU-feed | ||
# def print_version(self, url): | ||
# return url.replace('ua-football.com/ua/', 'ua-football.com/') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=utf-8 | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
class Unian(BasicNewsRecipe): | ||
title = 'UNIAN' | ||
description = ('UNIAN (Ukrainian Independent News Agency of News) is the largest independent news agency,' | ||
' first in Ukraine, founded in 1993, remaining the leader among the country\'s news media,' | ||
' being the most cited source of news from across Ukraine.') | ||
__author__ = 'bugmen00t' | ||
publication_type = 'newspaper' | ||
oldest_article = 30 | ||
max_articles_per_feed = 100 | ||
language = 'en_UK' | ||
cover_url = 'https://www.unian.info/images/unian-512x512.png' | ||
auto_cleanup = False | ||
no_stylesheets = True | ||
|
||
remove_tags_before = dict(name='h1') | ||
remove_tags_after = dict(name='div', attrs={'class': 'article-text'}) | ||
remove_tags = [ | ||
dict(name='span', attrs={'class': 'article__info-item comments'}), | ||
dict(name='span', attrs={'class': 'article__info-item views'}), | ||
dict(name='div', attrs={'class': 'read-also-slider'}), | ||
dict(name='div', attrs={'class': 'nts-video-wrapper'}) | ||
] | ||
|
||
feeds = [ | ||
(u'News Agency UNIAN', u'https://rss.unian.net/site/news_eng.rss') | ||
] | ||
|
||
def preprocess_html(self, soup): | ||
for img in soup.findAll('img', attrs={'data-src': True}): | ||
img['src'] = img['data-src'] | ||
return soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#!/usr/bin/env python | ||
# vim:fileencoding=utf-8 | ||
|
||
from calibre.web.feeds.news import BasicNewsRecipe | ||
|
||
|
||
class Unian(BasicNewsRecipe): | ||
title = '\u0423\u041D\u0406\u0410\u041D' | ||
description = ( | ||
'\u0423\u041D\u0406\u0410\u041D (\u0423\u043A\u0440\u0430\u0457\u043D\u0441\u044C\u043A\u0435' | ||
' \u041D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435 \u0406\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435' | ||
' \u0410\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E \u041D\u043E\u0432\u0438\u043D) -' | ||
' \u043F\u0435\u0440\u0448\u0435 \u0432 \u0423\u043A\u0440\u0430\u0457\u043D\u0456 \u0442\u0430' | ||
' \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448\u0435 \u043D\u0435\u0437\u0430\u043B\u0435\u0436\u043D\u0435' | ||
' \u0456\u043D\u0444\u043E\u0440\u043C\u0430\u0446\u0456\u0439\u043D\u0435 \u0430\u0433\u0435\u043D\u0442\u0441\u0442\u0432\u043E,' | ||
' \u0437\u0430\u0441\u043D\u043E\u0432\u0430\u043D\u0435 1993 \u0440\u043E\u043A\u0443, \u043B\u0456\u0434\u0435\u0440' | ||
' \u0441\u0435\u0440\u0435\u0434 \u043D\u043E\u0432\u0438\u043D\u043D\u0438\u0445 \u043C\u0435\u0434\u0456\u0430' | ||
' \u043A\u0440\u0430\u0457\u043D\u0438, \u043D\u0430\u0439\u0431\u0456\u043B\u044C\u0448' | ||
' \u0446\u0438\u0442\u043E\u0432\u0430\u043D\u0435 \u0434\u0436\u0435\u0440\u0435\u043B\u043E' | ||
' \u043D\u043E\u0432\u0438\u043D \u043F\u0440\u043E \u043F\u043E\u0434\u0456\u0457 \u0432 \u043A\u0440\u0430\u0457\u043D\u0456.') | ||
__author__ = 'bugmen00t' | ||
publication_type = 'newspaper' | ||
oldest_article = 7 | ||
max_articles_per_feed = 100 | ||
language = 'uk' | ||
cover_url = 'https://www.unian.ua/images/unian-512x512.png' | ||
auto_cleanup = False | ||
no_stylesheets = True | ||
|
||
remove_tags_before = dict(name='h1') | ||
remove_tags_after = dict(name='div', attrs={'class': 'article-text'}) | ||
remove_tags = [ | ||
dict(name='span', attrs={'class': 'article__info-item comments'}), | ||
dict(name='span', attrs={'class': 'article__info-item views'}), | ||
dict(name='div', attrs={'class': 'read-also-slider'}), | ||
dict(name='div', attrs={'class': 'nts-video-wrapper'}) | ||
] | ||
|
||
feeds = [ | ||
(u'\u0423\u041D\u0406\u0410\u041D', u'https://rss.unian.net/site/news_ukr.rss') | ||
] | ||
|
||
def preprocess_html(self, soup): | ||
for img in soup.findAll('img', attrs={'data-src': True}): | ||
img['src'] = img['data-src'] | ||
return soup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters