-
Notifications
You must be signed in to change notification settings - Fork 2
/
exampleconfig.yaml
79 lines (67 loc) · 1.97 KB
/
exampleconfig.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# GLOBAL
log-level: "debug"
log-formatter: "logfmt"
log-method: false
# SERVER
# server port
port: 8880
# prefix for server endpoint paths
path-prefix: ""
# log server requests
log-requests: true
# INDEX
# index format (index): "cdxj", "cdxpb", "badger", "tikv" or "toc"
# index format (serve/reset): "badger" or "tikv"
index-format: cdxj
# index source: "file" or "kafka"
index-source: file
# include only files matching regular expressions
index-include:
- ".warc.gz$"
# exclude files matching regular expressions
index-exclude: []
# number of index workers
index-workers: 8
# FILE TRAVERSAL INDEX SOURCE
# warc files or directories to search for warc files in
file-paths:
- "./testdata"
# max number of directory recursions
file-max-depth: 4
# KAFKA INDEX SOURCE
# the list of broker addresses used to connect to the kafka cluster
kafka-brokers:
- 127.0.0.1:3939
# optional consumer group id
kafka-group-id: "my-group-id"
# the topic to read messages from
kafka-topic: "my-topic"
# indicates to the broker the minimum batch size that the consumer will accept
kafka-min-bytes: 0
# indicates to the broker the maximum batch size that the consumer will accept
kafka-max-bytes: 0
# maximum amount of time to wait for new data to come when fetching batches of messages from kafka
kafka-max-wait: 0
# path to badger database
badger-dir: ./warcdb
# run badger read-only
badger-read-only: false
# max transaction batch size
badger-batch-max-size: 1000
# max wait time before flushing batched records regardless of max batch size
badger-batch-max-wait: 5s
# badger compression algorithm
badger-compression: "snappy"
# badger database to use
badger-database: ""
# address of tikv placement driver
tikv-pd-addr: "127.0.0.1:2379"
# max transaction batch size
tikv-batch-max-size: 1000
# max wait time before flushing batched records regardless of max batch size
tikv-batch-max-wait: 5s
# tikv database to use
tikv-database: ""
# estimated bloom filter capacity
toc-bloom-capacity: 10000
toc-bloom-fp: 0.1