-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from pbansal5/debugging
Debugging
- Loading branch information
Showing
21 changed files
with
306 additions
and
249 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
src/.ipynb_checkpoints | ||
src/.ipynb_checkpoints | ||
jsons/ |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
**************** Benchmark Params **************** | ||
auth_token None | ||
cache_dir None | ||
dataset_name wikitext-103-v1 | ||
dataset_path wikitext | ||
dataset_split validation | ||
load_from hf | ||
max_length 32 | ||
model_layer None | ||
model_name gpt2 | ||
model_parallelism False | ||
normalization_level word | ||
num_docs_to_rank -1 | ||
output_dir logs/baseline | ||
ranking_logprob_past_tokens 16 | ||
ranking_strategy first | ||
retrieved_file None | ||
retrieved_max_length 256 | ||
stride 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"eval_perplexity": 1.0391204357147217} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
**************** Benchmark Params **************** auth_token None | ||
cache_dir None | ||
dataset_name wikitext-103-v1 | ||
dataset_path wikitext | ||
dataset_split validation | ||
load_from hf | ||
max_length 32 | ||
model_layer 12 | ||
model_name gpt2 | ||
model_parallelism False | ||
normalization_level word | ||
num_docs_to_rank 16 | ||
output_dir logs/results-gp2-bert-base | ||
ranking_logprob_past_tokens 16 | ||
ranking_strategy colbert | ||
retrieved_file jsons/bert_reranked_wikitext_rql_32_rs_4_topK_16.json | ||
retrieved_max_length 32 | ||
stride 4 | ||
|
||
|
||
|
||
**************** BM25 Logging INFO **************** data_dir | ||
forbidden_titles jsons/wikitext_forbidden_titles.txt | ||
output_file jsons/wikitext_rql_32_rs_4_topK_100.json | ||
query_corpus wikitext | ||
retrieval_corpus wikipedia-dpr-100w | ||
retrieval_query_length 32 | ||
retrieval_stride 4 | ||
tokenizer gpt2 | ||
topK 100 | ||
|
||
|
||
|
||
**************** Reranking Logging INFO **************** bm25_file jsons/wikitext_rql_32_rs_4_topK_100.json | ||
data_dir | ||
max_length 256 | ||
rerank_model bert-base-uncased | ||
reranked_file jsons/bert_reranked_wikitext_rql_32_rs_4_topK_16.json | ||
retrieval_corpus wikipedia-dpr-100w | ||
topK 16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"eval_perplexity": 1.1255316734313965, "num_input_no_retrieval": 0} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
export MODEL_NAME='gpt2' | ||
export OUTPUT_DIR='logs/baseline' | ||
|
||
python3 -m benchmark.eval_lm \ | ||
--model_name $MODEL_NAME \ | ||
--dataset_path wikitext \ | ||
--dataset_name wikitext-103-v1 \ | ||
--dataset_split 'validation' \ | ||
--output_dir $OUTPUT_DIR \ | ||
--stride 4 \ | ||
--max_length 32 \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
export MODEL_NAME='gpt2' | ||
export OUTPUT_DIR='logs/results-gp2-bert-base' | ||
export RETRIEVAL_FILE='jsons/bert_reranked_wikitext_rql_32_rs_4_topK_16.json' | ||
|
||
python3 -m benchmark.eval_lm \ | ||
--model_name $MODEL_NAME \ | ||
--dataset_path wikitext \ | ||
--dataset_name wikitext-103-v1 \ | ||
--dataset_split 'validation' \ | ||
--output_dir $OUTPUT_DIR \ | ||
--stride 4 \ | ||
--max_length 32 \ | ||
--retrieved_file $RETRIEVAL_FILE \ | ||
--ranking_strategy 'colbert' \ | ||
--num_docs_to_rank 16 \ | ||
--ranking_logprob_past_tokens 16 \ | ||
--retrieved_max_length 32 \ | ||
--model_layer 12 |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.