forked from vectara/vectara-ingest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
news-npr.yaml
20 lines (18 loc) · 844 Bytes
/
news-npr.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
vectara:
corpus_id: 155
customer_id: 1526022105
reindex: false
crawling:
crawler_type: rss
rss_crawler:
source: npr
rss_pages: [
"https://feeds.npr.org/1001/rss.xml", "https://www.npr.org/rss/rss.php?id=1001", "https://www.npr.org/rss/rss.php?id=1032",
"https://www.npr.org/rss/rss.php?id=1006", "https://www.npr.org/rss/rss.php?id=1128", "https://www.npr.org/rss/rss.php?id=1007",
"https://www.npr.org/rss/rss.php?id=1019", "https://www.npr.org/rss/rss.php?id=1004", "https://www.npr.org/rss/rss.php?id=1003",
"https://www.npr.org/rss/rss.php?id=1014", "https://www.npr.org/rss/rss.php?id=1013", "https://www.npr.org/rss/rss.php?id=1008",
"https://www.npr.org/rss/rss.php?id=1101", "https://www.npr.org/rss/rss.php?id=1045"
]
days_past: 30
delay: 1
extraction: playwright # pdf or playwright