-
Notifications
You must be signed in to change notification settings - Fork 78
/
Copy pathconfig.json
87 lines (87 loc) · 2.51 KB
/
config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
{
"$schema": "../../config.schema.json",
"experiment_name": "baseline",
"job_name": "baseline_job",
"job_description": "",
"data_formats": ["*"],
"main_instruction": "",
"use_checkpoints": true,
"path": {},
"index": {
"index_name_prefix": "ci",
"ef_construction": [400],
"ef_search": [400],
"chunking": {
"preprocess": false,
"chunk_size": [1000],
"overlap_size": [200],
"generate_title": false,
"generate_summary": false,
"override_content_with_summary": false,
"chunking_strategy": "basic",
"azure_document_intelligence_model": "prebuilt-read"
},
"embedding_model": [
{
"type": "sentence-transformer",
"model_name": "all-mpnet-base-v2"
}
],
"sampling": {
"sample_data": false,
"percentage": 5,
"optimum_k": "auto",
"min_cluster": 2,
"max_cluster": 30
}
},
"language":{
"analyzer": {
"analyzer_name": "en.microsoft",
"index_analyzer_name": "",
"search_analyzer_name": "",
"char_filters": [],
"tokenizers": [],
"token_filters": []
},
"query_language": "en-us"
},
"rerank": {
"enabled": true,
"type": "cross_encoder",
"llm_rerank_threshold": 3,
"cross_encoder_at_k": 4,
"cross_encoder_model": "cross-encoder/stsb-roberta-base"
},
"search": {
"retrieve_num_of_documents": 5,
"search_type": [
"search_for_manual_hybrid",
"search_for_match_Hybrid_multi",
"search_for_match_semantic"
],
"search_relevancy_threshold": 0.8
},
"query_expansion": {
"query_expansion": true,
"hyde": "generated_hypothetical_answer",
"min_query_expansion_related_question_similarity_score": 90,
"expand_to_multiple_questions": true
},
"openai": {
"azure_oai_chat_deployment_name": "gpt-35-turbo",
"azure_oai_eval_deployment_name": "gpt-35-turbo",
"temperature": 0
},
"eval": {
"metric_types": [
"fuzzy_score",
"cosine_ochiai",
"rouge2_recall",
"bert_all_MiniLM_L6_v2",
"bert_distilbert_base_nli_stsb_mean_tokens",
"llm_answer_relevance",
"llm_context_precision"
]
}
}