forked from vectara/vectara-ingest
-
Notifications
You must be signed in to change notification settings - Fork 0
/
news-cnbc.yaml
22 lines (20 loc) · 989 Bytes
/
news-cnbc.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
vectara:
corpus_id: 154
customer_id: 1526022105
reindex: false
crawling:
crawler_type: rss
rss_crawler:
source: cnbc
rss_pages: [
"https://www.cnbc.com/id/100003114/device/rss/rss.html", "https://www.cnbc.com/id/105545213/device/rss/rss.html",
"https://www.cnbc.com/id/10000664/device/rss/rss.html", "https://www.cnbc.com/id/10000744/device/rss/rss.html",
"https://www.cnbc.com/id/10000108/device/rss/rss.html", "https://www.cnbc.com/id/10000115/device/rss/rss.html",
"https://www.cnbc.com/id/10000113/device/rss/rss.html", "https://www.cnbc.com/id/10000114/device/rss/rss.html",
"https://www.cnbc.com/id/10000116/device/rss/rss.html", "https://www.cnbc.com/id/19836768/device/rss/rss.html",
"https://www.cnbc.com/id/10000110/device/rss/rss.html", "https://www.cnbc.com/id/10000739/device/rss/rss.html",
"https://www.cnbc.com/id/10000117/device/rss/rss.html"
]
days_past: 30
delay: 1
extraction: playwright # pdf or playwright