Merge pull request #4 from rloganiv/beamsum

Beamsum
rloganiv · Jul 13, 2020 · 003ffd8 · 003ffd8
2 parents e915b77 + ef6b352
commit 003ffd8
Show file tree

Hide file tree

Showing 37 changed files with 2,039 additions and 653 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -5,10 +5,11 @@ Dockerfile
 old-results/
 .git/
 .gitignore
+.ipynb_checkpoints/
 .mypy_cache
 .pytest_cache/
-results
-scripts
+results/
+scripts/
 .travis.yml
 .venv
 .vscode
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@
 *.pyc
 __pycache__
 .venv/
+.ipynb_checkpoints
 
 
 # Testing
@@ -14,5 +15,9 @@ __pycache__
 
 .vscode/
 
-# AlleNLP
+# Experiment-related
 results/
+data/
+*.yaml
+*.npy
+*.jinja2
diff --git a/Dockerfile b/Dockerfile
@@ -1,33 +1,28 @@
-FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
-RUN echo "deb-src http://archive.ubuntu.com/ubuntu/ xenial main" | tee -a /etc/apt/sources.list
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        cmake \
-        git \
-        curl \
-        vim \
-        ca-certificates \
-        libjpeg-dev \
-        libpng-dev &&\
-    rm -rf /var/lib/apy/lists/*
+# FROM python:3.6.8-jessie
+FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-runtime
+ENV LC_ALL=C.UTF-8
+ENV LANG=C.UTF-8
 
-RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
-     chmod +x ~/miniconda.sh && \
-     ~/miniconda.sh -b -p /opt/conda && \
-     rm ~/miniconda.sh && \
-     /opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include cython typing && \
-     /opt/conda/bin/conda install -y -c pytorch magma-cuda100 && \
-     /opt/conda/bin/conda clean -ya
-ENV PATH /opt/conda/bin:$PATH
+ENV PATH /usr/local/nvidia/bin/:$PATH
+ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
+
+# Tell nvidia-docker the driver spec that we need as well as to
+# use all available devices, which are mounted at /usr/local/nvidia.
+# The LABEL supports an older version of nvidia-docker, the env
+# variables a newer one.
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+LABEL com.nvidia.volumes.needed="nvidia_driver"
 
 WORKDIR /workspace
+RUN chmod -R a+w /workspace
+
+COPY requirements.txt .
+RUN pip install --upgrade pip
+RUN pip install -r requirements.txt
 
-COPY experiments/ experiments/
-COPY kglm/ kglm/
 COPY .pylintrc .pylintrc
 COPY pytest.ini pytest.ini
 COPY README.md README.md
-COPY requirements.txt .
-
-RUN pip install -r requirements.txt
-RUN chmod -R a+w /workspace
+COPY kglm/ kglm/
+COPY experiments/ experiments/
diff --git a/experiments/conll_2012_vocab.jsonnet b/experiments/conll_2012_vocab.jsonnet
@@ -0,0 +1,17 @@
+{
+    "vocabulary": {
+        "type": "extended",
+        "max_vocab_size": {"tokens": 10000}
+    },
+    "datasets_for_vocab_creation": ["train"],
+    "dataset_reader": {
+        "type": "conll2012_jsonl",
+        "token_indexers": {
+            "tokens": {
+                "type": "single_id",
+                "lowercase_tokens": false
+            }
+        }
+    },
+    "train_data_path": "data/conll-2012/processed/train.jsonl",
+}
diff --git a/experiments/entity_disc.jsonnet b/experiments/entity_disc.jsonnet
@@ -37,7 +37,7 @@
     "trainer": {
         "type": "lm",
         "cuda_device": 0,
-        "num_epochs": 750,
+        "num_epochs": 13,
         "optimizer": {
             "type": "adam",
             "lr": 0.0003
@@ -48,4 +48,4 @@
         "directory_path": "data/enhanced-wikitext-2/vocab",
         "extend": false
     }
-}
+}
diff --git a/experiments/entity_disc_conll2012.jsonnet b/experiments/entity_disc_conll2012.jsonnet
@@ -6,12 +6,6 @@
     },
     "dataset_reader": {
         "type": "conll2012_jsonl",
-        "token_indexers": {
-            "tokens": {
-                "type": "single_id",
-                "lowercase_tokens": true
-            }
-        }
     },
     "train_data_path": "data/conll-2012/processed/train.jsonl",
     "validation_data_path": "data/conll-2012/processed/dev.jsonl",

diff --git a/experiments/entity_disc_conll2012_no_peeking.jsonnet b/experiments/entity_disc_conll2012_no_peeking.jsonnet
@@ -6,13 +6,7 @@
     },
     "dataset_reader": {
         "type": "conll2012_jsonl",
-        "offset": 1,
-        "token_indexers": {
-            "tokens": {
-                "type": "single_id",
-                "lowercase_tokens": true
-            }
-        }
+        "offset": 1
     },
     "train_data_path": "data/conll-2012/processed/train.jsonl",
     "validation_data_path": "data/conll-2012/processed/dev.jsonl",
@@ -22,13 +16,13 @@
             "token_embedders": {
                 "tokens": {
                     "type": "embedding",
-                    "embedding_dim": 128,
+                    "embedding_dim": 256,
                     "trainable": true
                 },
             },
         },
-        "embedding_dim": 128,
-        "hidden_size": 128,
+        "embedding_dim": 256,
+        "hidden_size": 256,
         "num_layers": 1,
         "max_mention_length": 100,
         "max_embeddings": 100,
@@ -37,8 +31,8 @@
     },
     "iterator": {
         "type": "fancy",
-        "batch_size": 16,
-        "split_size": 15,
+        "batch_size": 343,
+        "split_size": 30,
         "splitting_keys": [
             "source",
             "entity_types",
@@ -48,8 +42,8 @@
     },
     "validation_iterator": {
         "type": "fancy",
-        "batch_size": 16,
-        "split_size": 15,
+        "batch_size": 343,
+        "split_size": 128,
         "splitting_keys": [
             "source",
             "entity_types",
@@ -64,7 +58,7 @@
         "cuda_device": 0,
         "optimizer": {
             "type": "adam",
-            "lr": 1e-4
+            "lr": 1e-3
         },
         "validation_metric": "+eid_acc"
     }

diff --git a/experiments/entity_disc_conll2012_prp.jsonnet b/experiments/entity_disc_conll2012_prp.jsonnet
@@ -0,0 +1,64 @@
+{
+    "vocabulary": {
+        "type": "extended",
+        "extend": false,
+        "directory_path": "/kermit/rlogan/entity-nlm/data/vocabulary"
+    },
+    "dataset_reader": {
+        "type": "conll2012_jsonl",
+    },
+    "train_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/train.jsonl",
+    "validation_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/dev.jsonl",
+    "model": {
+        "type": "entitydisc",
+        "text_field_embedder": {
+            "token_embedders": {
+                "tokens": {
+                    "type": "embedding",
+                    "embedding_dim": 256,
+                    "trainable": true
+                },
+            },
+        },
+        "embedding_dim": 256,
+        "hidden_size": 256,
+        "num_layers": 1,
+        "max_mention_length": 100,
+        "max_embeddings": 100,
+        "dropout_rate": 0.4,
+        "variational_dropout_rate": 0.1
+    },
+    "iterator": {
+        "type": "fancy",
+        "batch_size": 343,
+        "split_size": 30,
+        "splitting_keys": [
+            "source",
+            "entity_types",
+            "entity_ids",
+            "mention_lengths"
+        ],
+    },
+    "validation_iterator": {
+        "type": "fancy",
+        "batch_size": 343,
+        "split_size": 128,
+        "splitting_keys": [
+            "source",
+            "entity_types",
+            "entity_ids",
+            "mention_lengths"
+        ],
+        "truncate": false
+    },
+    "trainer": {
+        "type": "lm",
+        "num_epochs": 400,
+        "cuda_device": 0,
+        "optimizer": {
+            "type": "adam",
+            "lr": 1e-3
+        },
+        "validation_metric": "+eid_acc"
+    }
+}
diff --git a/experiments/entity_nlm.jsonnet b/experiments/entity_nlm.jsonnet
@@ -53,7 +53,7 @@
         "num_epochs": 750,
         "optimizer": {
             "type": "adam",
-            "lr": 0.0003
+            "lr": 0.0001
         }
     },
     "vocabulary": {

diff --git a/experiments/entity_nlm_conll2012.jsonnet b/experiments/entity_nlm_conll2012.jsonnet
@@ -1,17 +1,11 @@
 {
     "vocabulary": {
         "type": "extended",
-        "extend": false,
-        "directory_path": "data/vocabulary"
+        "directory_path": "data/vocabulary",
+        "extend": false
     },
     "dataset_reader": {
         "type": "conll2012_jsonl",
-        "token_indexers": {
-            "tokens": {
-                "type": "single_id",
-                "lowercase_tokens": true
-            }
-        }
     },
     "train_data_path": "data/conll-2012/processed/train.jsonl",
     "validation_data_path": "data/conll-2012/processed/dev.jsonl",
@@ -22,24 +16,24 @@
             "token_embedders": {
                 "tokens": {
                     "type": "embedding",
-                    "embedding_dim": 256,
+                    "embedding_dim": 300,
                     "trainable": true
                 },
             },
         },
-        "embedding_dim": 256,
-        "hidden_size": 256,
+        "embedding_dim": 300,
+        "hidden_size": 300,
         "num_layers": 1,
         "max_mention_length": 100,
         "max_embeddings": 100,
-        "tie_weights": true,
-        "dropout_rate": 0.4,
-        "variational_dropout_rate": 0.1
+        "tie_weights": false,
+        "dropout_rate": 0.1,
+        "variational_dropout_rate": 0.2
     },
     "iterator": {
         "type": "fancy",
-        "batch_size": 512,
-        "split_size": 15,
+        "batch_size": 256,
+        "split_size": 120,
         "splitting_keys": [
             "source",
             "entity_types",
@@ -49,8 +43,8 @@
     },
     "validation_iterator": {
         "type": "fancy",
-        "batch_size": 512,
-        "split_size": 15,
+        "batch_size": 343,
+        "split_size": 128,
         "splitting_keys": [
             "source",
             "entity_types",
@@ -61,11 +55,11 @@
     },
     "trainer": {
         "type": "lm",
-        "num_epochs": 40,
+        "num_epochs": 400,
         "cuda_device": 0,
         "optimizer": {
             "type": "adam",
-            "lr": 1e-3
+            "lr": 1e-3,
         }
     }
 }