Skip to content

Commit

Permalink
Merge pull request #2 from rloganiv/no-peeking
Browse files Browse the repository at this point in the history
No peeking
  • Loading branch information
rloganiv authored Oct 16, 2019
2 parents 2ceed77 + b933b89 commit e915b77
Show file tree
Hide file tree
Showing 17 changed files with 601 additions and 109 deletions.
21 changes: 17 additions & 4 deletions experiments/entity_disc_conll2012.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"vocabulary": {
"type": "extended",
"extend": false,
"directory_path": "results/entity-nlm-conll2012-fixed/vocabulary"
"directory_path": "data/vocabulary"
},
"dataset_reader": {
"type": "conll2012_jsonl",
Expand Down Expand Up @@ -36,7 +36,7 @@
},
"iterator": {
"type": "fancy",
"batch_size": 16,
"batch_size": 343,
"split_size": 30,
"splitting_keys": [
"source",
Expand All @@ -45,13 +45,26 @@
"mention_lengths"
],
},
"validation_iterator": {
"type": "fancy",
"batch_size": 343,
"split_size": 128,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"trainer": {
"type": "lm",
"num_epochs": 40,
"num_epochs": 400,
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 1e-3
}
},
"validation_metric": "+eid_acc"
}
}
70 changes: 70 additions & 0 deletions experiments/entity_disc_conll2012_mini.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"vocabulary": {
"type": "extended",
"extend": false,
"directory_path": "data/vocabulary-mini"
},
"dataset_reader": {
"type": "conll2012_jsonl",
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"train_data_path": "data/conll-2012/processed/train-mini.jsonl",
"validation_data_path": "data/conll-2012/processed/dev-mini.jsonl",
"model": {
"type": "entitydisc",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 256,
"trainable": true
},
},
},
"embedding_dim": 256,
"hidden_size": 256,
"num_layers": 1,
"max_mention_length": 50,
"max_embeddings": 10,
"dropout_rate": 0.4,
"variational_dropout_rate": 0.1
},
"iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
},
"validation_iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"trainer": {
"type": "lm",
"num_epochs": 400,
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 1e-3
},
"validation_metric": "+eid_acc"
}
}
71 changes: 71 additions & 0 deletions experiments/entity_disc_conll2012_no_peeking.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"vocabulary": {
"type": "extended",
"extend": false,
"directory_path": "data/vocabulary"
},
"dataset_reader": {
"type": "conll2012_jsonl",
"offset": 1,
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"train_data_path": "data/conll-2012/processed/train.jsonl",
"validation_data_path": "data/conll-2012/processed/dev.jsonl",
"model": {
"type": "entitydisc",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 128,
"trainable": true
},
},
},
"embedding_dim": 128,
"hidden_size": 128,
"num_layers": 1,
"max_mention_length": 100,
"max_embeddings": 100,
"dropout_rate": 0.4,
"variational_dropout_rate": 0.1
},
"iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
},
"validation_iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"trainer": {
"type": "lm",
"num_epochs": 400,
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 1e-4
},
"validation_metric": "+eid_acc"
}
}
71 changes: 71 additions & 0 deletions experiments/entity_disc_conll2012_no_peeking_mini.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"vocabulary": {
"type": "extended",
"extend": false,
"directory_path": "data/vocabulary-mini"
},
"dataset_reader": {
"type": "conll2012_jsonl",
"offset": 1,
"token_indexers": {
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"train_data_path": "data/conll-2012/processed/train-mini.jsonl",
"validation_data_path": "data/conll-2012/processed/dev-mini.jsonl",
"model": {
"type": "entitydisc",
"text_field_embedder": {
"token_embedders": {
"tokens": {
"type": "embedding",
"embedding_dim": 128,
"trainable": true
},
},
},
"embedding_dim": 128,
"hidden_size": 128,
"num_layers": 1,
"max_mention_length": 50,
"max_embeddings": 10,
"dropout_rate": 0.4,
"variational_dropout_rate": 0.1
},
"iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
},
"validation_iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"trainer": {
"type": "lm",
"num_epochs": 400,
"cuda_device": 0,
"optimizer": {
"type": "adam",
"lr": 1e-4
},
"validation_metric": "+eid_acc"
}
}
4 changes: 2 additions & 2 deletions experiments/entity_disc_test.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"vocabulary": {
"type": "extended",
"extend": false,
"directory_path": "./results/entity-nlm-wt2.fixed-vocab.dropout.2/vocabulary"
"directory_path": "data/vocabulary"
},
"dataset_reader": {
"type": "enhanced-wikitext",
Expand Down Expand Up @@ -59,4 +59,4 @@
"lr": 1e-3
}
}
}
}
14 changes: 13 additions & 1 deletion experiments/entity_nlm.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"iterator": {
"type": "fancy",
"batch_size": 60,
"batch_size": 30,
"split_size": 70,
"splitting_keys": [
"source",
Expand All @@ -13,6 +13,18 @@
"mention_lengths"
]
},
"validation_iterator": {
"type": "fancy",
"batch_size": 30,
"split_size": 70,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"model": {
"type": "entitynlm",
"dropout_rate": 0.5,
Expand Down
23 changes: 16 additions & 7 deletions experiments/entity_nlm_conll2012.jsonnet
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{
"vocabulary": {
"type": "extended",
"max_vocab_size": {
// This does not count the @@UNKNOWN@@ token, which
// ends up being our 10,000th token.
"tokens": 9999
}
"extend": false,
"directory_path": "data/vocabulary"
},
"dataset_reader": {
"type": "conll2012_jsonl",
Expand Down Expand Up @@ -41,14 +38,26 @@
},
"iterator": {
"type": "fancy",
"batch_size": 16,
"split_size": 30,
"batch_size": 512,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
},
"validation_iterator": {
"type": "fancy",
"batch_size": 512,
"split_size": 15,
"splitting_keys": [
"source",
"entity_types",
"entity_ids",
"mention_lengths"
],
"truncate": false
},
"trainer": {
"type": "lm",
Expand Down
Loading

0 comments on commit e915b77

Please sign in to comment.