From 1faa08a2d2f2602f7320dba1e20d62f185d3cc57 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Thu, 17 Oct 2019 13:04:20 -0700 Subject: [PATCH 01/35] * Added WeightDropLstm module --- kglm/modules/__init__.py | 2 +- kglm/modules/weight_drop.py | 77 +++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 5 deletions(-) diff --git a/kglm/modules/__init__.py b/kglm/modules/__init__.py index 13ab4d2..9aa110a 100644 --- a/kglm/modules/__init__.py +++ b/kglm/modules/__init__.py @@ -4,4 +4,4 @@ from .locked_dropout import LockedDropout from .recent_entities import RecentEntities from .splitcross import SplitCrossEntropyLoss -from .weight_drop import WeightDrop +from .weight_drop import WeightDrop, WeightDroppedLstm diff --git a/kglm/modules/weight_drop.py b/kglm/modules/weight_drop.py index 0370711..3b42c93 100644 --- a/kglm/modules/weight_drop.py +++ b/kglm/modules/weight_drop.py @@ -1,10 +1,18 @@ +from typing import Dict, List, Optional, Tuple + +from overrides import overrides import torch from torch.nn import Parameter import torch.nn.functional as F +LstmState = Tuple[torch.FloatTensor, torch.FloatTensor] +StateDict = Dict[str, LstmState] + + class WeightDrop(torch.nn.Module): "A module that warps another layer in which some weights will be replaced by 0 during training." + # pylint: disable=protected-access def __init__(self, module, weights, dropout=0): super().__init__() @@ -13,9 +21,9 @@ def __init__(self, module, weights, dropout=0): self.dropout = dropout for weight in self.weights: #Makes a copy of the weights of the selected layers. - w = getattr(self.module, weight) - self.register_parameter(f'{weight}_raw', Parameter(w.data)) - self.module._parameters[weight] = F.dropout(w, p=self.dropout, training=False) + raw_w = getattr(self.module, weight) + self.register_parameter(f'{weight}_raw', Parameter(raw_w.data)) + self.module._parameters[weight] = F.dropout(raw_w, p=self.dropout, training=False) def _setweights(self): "Apply dropout to the raw weights." @@ -31,5 +39,66 @@ def reset(self): for weight in self.weights: raw_w = getattr(self, f'{weight}_raw') self.module._parameters[weight] = F.dropout(raw_w, p=self.dropout, training=False) - if hasattr(self.module, 'reset'): self.module.reset() + if hasattr(self.module, 'reset'): + self.module.reset() + + +class WeightDroppedLstm(torch.nn.Module): + def __init__(self, + num_layers: int, + embedding_dim: int, + hidden_size: int, + dropout: float) -> None: + super().__init__() + + self._num_layers = num_layers + self._embedding_dim = embedding_dim + self._hidden_size = hidden_size + self._dropout = dropout + self._state: Optional[StateDict] = None + + # Create an LSTM for each layer and apply weight drop. + rnns: List[torch.nn.Module] = [] + for i in range(num_layers): + if i == 0: + input_size = embedding_dim + else: + input_size = hidden_size + if i == num_layers - 1: + output_size = embedding_dim + else: + output_size = hidden_size + rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) + rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=dropout) for rnn in rnns] + + self._rnns = torch.nn.ModuleList(rnns) + + @overrides + def forward(self, embeddings: torch.FloatTensor) -> torch.FloatTensor: # pylint: disable=arguments-differ + current_input = embeddings + hidden_list = [] + for layer, rnn in enumerate(self._rnns): + # Retrieve previous hidden state for layer. Weird syntax in order to appease MyPy. + prev_hidden: Optional[LstmState] = None + if self._state is not None: + prev_hidden = self._state['layer_%i' % layer] + # Forward-pass. + output, hidden = rnn(current_input, prev_hidden) + output = output.contiguous() + # Update hidden state for layer. + hidden = tuple(h.detach() for h in hidden) + hidden_list.append(hidden) + current_input = output + self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_list)} + return current_input + def reset(self, reset: torch.ByteTensor) -> None: + """Resets the internal hidden states""" + # pylint: disable=invalid-name + if self._state is None: + return + for layer in range(self._num_layers): + h, c = self._state['layer_%i' % layer] + h[:, reset, :] = torch.zeros_like(h[:, reset, :]) + c[:, reset, :] = torch.zeros_like(c[:, reset, :]) + self._state['layer_%i' % layer] = (h, c) From 793ecc7fd5d0cd2e0f8f850a199f30b3eaa1f621 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Thu, 17 Oct 2019 13:20:35 -0700 Subject: [PATCH 02/35] * Updated EntityNLMDiscriminator to used WeightDroppedLstm --- kglm/models/entity_disc.py | 132 ++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 68 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 692becd..3156868 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -2,7 +2,7 @@ Discriminative version of EntityNLM for importance sampling. """ import logging -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple, Union from allennlp.nn.util import get_text_field_mask from allennlp.data.vocabulary import Vocabulary @@ -17,7 +17,7 @@ import torch import torch.nn.functional as F -from kglm.modules import DynamicEmbedding, WeightDrop +from kglm.modules import DynamicEmbedding, WeightDroppedLstm from kglm.nn.util import sample_from_logp logger = logging.getLogger(__name__) @@ -76,22 +76,7 @@ def __init__(self, self._max_embeddings = max_embeddings self._sos_token = self.vocab.get_token_index('@@START@@', 'tokens') self._eos_token = self.vocab.get_token_index('@@END@@', 'tokens') - - # Rnn Encoders. - rnns: List[torch.nn.Module] = [] - for i in range(num_layers): - if i == 0: - input_size = embedding_dim - else: - input_size = hidden_size - if (i == num_layers - 1): - output_size = embedding_dim - else: - output_size = hidden_size - rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) - rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=variational_dropout_rate) for rnn in rnns] - self.rnns = torch.nn.ModuleList(rnns) - + self._rnn = WeightDroppedLstm(num_layers, embedding_dim, hidden_size, variational_dropout_rate) self._state: Optional[StateDict] = None # Input variational dropout @@ -109,6 +94,7 @@ def __init__(self, self._mention_length_projection = torch.nn.Linear(in_features=2*embedding_dim, out_features=max_mention_length) + # Metrics self._entity_type_accuracy = CategoricalAccuracy() self._entity_id_accuracy = CategoricalAccuracy() self._mention_length_accuracy = CategoricalAccuracy() @@ -167,7 +153,7 @@ def forward(self, # pylint: disable=arguments-differ return output_dict - def sample(self, + def sample(self, # pylint: disable=unused-argument source: Dict[str, torch.Tensor], reset: torch.ByteTensor = None, temperature: float = 1.0, @@ -228,24 +214,7 @@ def sample(self, mask = mask.byte() & eos_mask embeddings = self._text_field_embedder(source) - current_input = embeddings - hidden_list = [] - for layer, rnn in enumerate(self.rnns): - # Retrieve previous hidden state for layer. - if self._state is not None: - prev_hidden = self._state['layer_%i' % layer] - else: - prev_hidden = None - # Forward-pass. - output, hidden = rnn(current_input, prev_hidden) - output = output.contiguous() - # Update hidden state for layer. - hidden = tuple(h.detach() for h in hidden) - hidden_list.append(hidden) - current_input = output - hidden = current_input - - self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_list)} + hidden = self._rnn(embeddings) # Initialize outputs logp = hidden.new_zeros(batch_size) # Track total logp for **each** generated sample @@ -344,6 +313,54 @@ def sample(self, } } + def _beam_step_fn(self, + candidates: torch.Tensor, + prev_state: Dict[str, Any]) -> Tuple[torch.Tensor, torch.Tensor]: + pass + + + def predict_top_k(self, + source: Dict[str, torch.Tensor], + reset: torch.ByteTensor, + k: int) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Obtain the top-k (approximately) most likely predictions from the model using beam + search. Unlike typical beam search all of the beam states are returned instead of just + the most likely. + + The returned candidates are intended to be marginalized over to obtain an upper bound for + the token-level perplexity of the EntityNLM. + + Parameters + ========== + source : ``Dict[str, torch.Tensor]`` + A tensor of shape ``(batch_size, sequence_length)`` containing the sequence of + tokens. + reset : ``torch.ByteTensor`` + Whether or not to reset the model's state. This should be done at the start of each + new sequence. + k : ``int`` + Number of predictions to return. + + Returns + ======= + predictions : ``torch.Tensor`` + A tensor of shape ``(batch_size * k, sequence_length)`` containing the top-k + predictions. + logp : ``torch.Tensor`` + The log-probabilities of each prediction. WARNING: These are returned purely for + diagnostic purposes and should not be factored in the the perplexity calculation. + """ + batch_size = source['tokens'].shape[0] + + # Reset + if reset is not None: + self.reset_states(reset) + + # Figure out WTF to do... + + + def _forward_loop(self, tokens: Dict[str, torch.Tensor], entity_types: torch.Tensor, @@ -391,24 +408,7 @@ def _forward_loop(self, mask = get_text_field_mask(tokens) embeddings = self._text_field_embedder(tokens) embeddings = self._variational_dropout(embeddings) - current_input = embeddings - hidden_list = [] - for layer, rnn in enumerate(self.rnns): - # Retrieve previous hidden state for layer. - if self._state is not None: - prev_hidden = self._state['layer_%i' % layer] - else: - prev_hidden = None - # Forward-pass. - output, hidden = rnn(current_input, prev_hidden) - output = output.contiguous() - # Update hidden state for layer. - hidden = tuple(h.detach() for h in hidden) - hidden_list.append(hidden) - current_input = output - hidden = current_input - - self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_list)} + hidden = self._rnn(embeddings) # Initialize losses entity_type_loss = torch.tensor(0.0, requires_grad=True, device=hidden.device) @@ -498,9 +498,10 @@ def _forward_loop(self, 'loss': total_loss } - # Update state # Update the model state - self._state['prev_mention_lengths'] = mention_lengths[:, -1].detach() + self._state = { + 'prev_mention_lengths': mention_lengths[:, -1].detach() + } return output_dict @@ -509,15 +510,10 @@ def reset_states(self, reset: torch.ByteTensor) -> None: if reset.any() and (self._state is not None): # Zero out any previous elements self._state['prev_mention_lengths'][reset] = 1 - # Zero out the hidden state - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - - # Reset the dynamic embeddings + + # Reset the dynamic embeddings and lstm self._dynamic_embeddings.reset_states(reset) + self._rnn.reset(reset) def detach_states(self): """Detaches the model's state to enforce truncated backpropagation.""" @@ -526,7 +522,7 @@ def detach_states(self): @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: return { - 'et_acc': self._entity_type_accuracy.get_metric(reset), - 'eid_acc': self._entity_id_accuracy.get_metric(reset), - 'ml_acc': self._mention_length_accuracy.get_metric(reset) + 'et_acc': self._entity_type_accuracy.get_metric(reset), + 'eid_acc': self._entity_id_accuracy.get_metric(reset), + 'ml_acc': self._mention_length_accuracy.get_metric(reset) } From 9c4665f2952c468e9490a499d9566d1d0f4b8336 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Thu, 17 Oct 2019 19:13:10 -0700 Subject: [PATCH 03/35] * Top-k predictions worked out --- kglm/models/entity_disc.py | 129 +++++++++++++++++++++++++-- kglm/modules/dynamic_embeddings.py | 1 - kglm/tests/models/entity_nlm_test.py | 12 +++ 3 files changed, 133 insertions(+), 9 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 3156868..a5b34d0 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -314,9 +314,100 @@ def sample(self, # pylint: disable=unused-argument } def _beam_step_fn(self, - candidates: torch.Tensor, - prev_state: Dict[str, Any]) -> Tuple[torch.Tensor, torch.Tensor]: - pass + hidden: torch.FloatTensor, + timestep: int, + k: int, + last_predictions: Optional[Dict[str, torch.Tensor]] = None, + state: Optional[Dict[str, Any]] = None) -> Tuple[torch.Tensor, torch.Tensor]: + """ + This function computes the top-k most likely next states for the beam. + + Parameters + ========== + + Returns + ======= + A tuple comprised of: + next + """ + batch_size, hidden_dim = hidden.shape + + # Initial predictions are a special case since we only have to consider batch_size inputs + # and don't need to deal with ongoing mentions. + if last_predictions is None: + # Step 1 - Compute type of next token + entity_type_logits = self._entity_type_projection(hidden) + + # shape: (batch_size, 2) + entity_type_logp = F.log_softmax(entity_type_logits, -1) + + # Step 2 - Compute probabilities for each entity + # TODO: Take this shit out - its obvious... + entity_id_prediction_outputs = self._dynamic_embeddings(hidden=hidden, + timestep=timestep) + entity_id_logits = entity_id_prediction_outputs['logits'] + + # shape: (batch_size, num_entities) + entity_id_logp = F.log_softmax(entity_id_logits, -1) # Technically should mask + + # Step 3 - Compute mention length probabilities for each entity + + # shape: (batch_size, num_entities, embedding_dim) + embeddings = self._dynamic_embeddings.embeddings[:, :1] # Only one entity right now + + concatenated = torch.cat((hidden.unsqueeze(1), embeddings), dim=-1) # Unsqueeze? + mention_length_logits = self._mention_length_projection(concatenated) + + # shape: (batch_size, num_entities, max_mention_length) + mention_length_logp = F.log_softmax(mention_length_logits, -1) + + # Step 4 - Combine log probabilities + + # Use meshgrid to get the Cartesian product of entity id and mention length options. + # shape: (num_entities, max_mention_length) + # TODO: Can mention_length = 0? + entity_ids, mention_lengths = torch.meshgrid(torch.arange(1, dtype=torch.int64), + torch.arange(self._max_mention_length, dtype=torch.int64)) + entity_ids = entity_ids.view(-1) + mention_lengths = mention_lengths.view(-1) + entity_types = torch.ones_like(entity_ids, dtype=torch.uint8) + + # When the type is not a mention there is only one possible annotation. Add it. + entity_types = torch.cat((torch.ByteTensor([0]), entity_types)) + entity_ids = torch.cat((torch.LongTensor([0]), entity_ids)) + mention_lengths = torch.cat((torch.LongTensor([1]), mention_lengths)) + num_annotations = entity_types.shape[0] + + logp = torch.zeros(size=(batch_size, num_annotations), + dtype=torch.float32, + device=hidden.device) + logp[:, 0] = entity_type_logp[:, 0] + logp[:, 1:] = entity_type_logp[:, 1].unsqueeze(-1) + logp[:, 1:] += entity_id_logp.view(batch_size, -1) + logp[:, 1:] += mention_length_logp.view(batch_size, -1) + + top_logp, top_indices = logp.topk(k, dim=-1) + output_dict = { + 'entity_types': torch.take(entity_types, top_indices), + 'entity_ids': torch.take(entity_ids, top_indices) + 'mention_lengths': torch.take(mention_lengths, top_indices) + } + return top_logp, output_dict + + + + + + + + + + + + + + + def predict_top_k(self, @@ -351,15 +442,37 @@ def predict_top_k(self, The log-probabilities of each prediction. WARNING: These are returned purely for diagnostic purposes and should not be factored in the the perplexity calculation. """ - batch_size = source['tokens'].shape[0] + batch_size, sequence_length = source['tokens'].shape - # Reset - if reset is not None: - self.reset_states(reset) + # Reset the model's internal state. + if not reset.all(): + raise RuntimeError('Detecting that not all states are being `reset` (e.g., that input ' + 'sequences have been split). Cannot predict top-K annotations in ' + 'this setting!') + self.reset_states(reset) + prev_mention_lengths = source['tokens'].new_ones(batch_size) - # Figure out WTF to do... + # Embed and encode the tokens up front. + embeddings = self._text_field_embedder(source['tokens']) + hidden = self._rnn(embeddings) + + # The rest of the code will be a modified version of: + # https://github.com/allenai/allennlp/blob/master/allennlp/nn/beam_search.py + + # List of predictions. One for each time step. Unlike in allennlp's beam search, here each + # element is a dictionary of (batch_size, beam_size) tensors whose keys are the annotation + # variables. + predictions: List[Dict[str, torch.Tensor]] = [] + # List of (batch_size, beam_size) tensors. One for each time step. Stores the index n of + # the parent prediction it came from. + backpointers: List[torch.LongTensor] = [] + + # To keep things simple, we are going to store the beam states + for timestep in range(sequence_length): + pass + # Trace backpointers to get output. def _forward_loop(self, tokens: Dict[str, torch.Tensor], diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index 3578fd3..79bdf19 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -241,4 +241,3 @@ def forward(self, # pylint: disable=arguments-differ out['loss'] = loss return out - diff --git a/kglm/tests/models/entity_nlm_test.py b/kglm/tests/models/entity_nlm_test.py index 4b5e9d4..778834b 100644 --- a/kglm/tests/models/entity_nlm_test.py +++ b/kglm/tests/models/entity_nlm_test.py @@ -32,3 +32,15 @@ def setUp(self): def test_model_can_train_save_and_load(self): self.ensure_model_can_train_save_and_load(self.param_file, gradients_to_ignore=['_dummy_context_embedding']) + + def test_beam_step_fn(self): + batch_size = 2 + + # Need to reset the states + reset = torch.ByteTensor([1] * batch_size) + self.model.reset_states(reset) + + # First time step + hidden = torch.randn(batch_size, self.model._embedding_dim) + self.model._beam_step_fn(hidden, 0, k=10) + From f555ccb3d59360dbf6c6affb789ed4ae903c60c6 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Fri, 18 Oct 2019 16:47:46 -0700 Subject: [PATCH 04/35] Partial --- kglm/models/entity_disc.py | 112 ++++++++++++++++++++++----- kglm/tests/models/entity_nlm_test.py | 17 +++- 2 files changed, 107 insertions(+), 22 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index a5b34d0..60f93b0 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -313,6 +313,96 @@ def sample(self, # pylint: disable=unused-argument } } + @property + def num_possible_annotations(self): + # Number of ways to annotate an entity mention + 1 way to annotate a non-entity mention. + return self._max_embeddings * self._max_mention_length + 1 + + @property + def entity_type_lookup(self): + entity_type_lookup = [0] + [1] * self._max_embeddings * self._max_mention_length + return torch.ByteTensor(entity_type_lookup) + + @property + def entity_id_lookup(self): + entity_id_lookup = [0] + list(range(self._max_embeddings) * self._max_mention_length) + return torch.LongTensor(entity_id_lookup) + + @property + def mention_length_lookup(self): + mention_length_lookup = [1] + list([i] * self._max_embeddings for i in range(self._max_mention_length)) + return torch.LongTensor(mention_length_lookup) + + def _annotation_logp(self, + hidden: torch.FloatTensor, + timestep: int, + state_dict: Dict[str, Any] = None) -> torch.Tensor: + """Computes the log-probability of all possible annotations for a single beam state. + + Parameters + ========== + TODO: Fill in + + Returns + ======= + A tensor of log-probabilities for the possible annotations of shape + (batch_size, num_annotations). + """ + batch_size, hidden_dim = hidden.shape + if state_dict is not None: + self._dynamic_embeddings.load_state_dict(state_dict['dynamic_embeddings_state_dict']) + elif timestep > 0: + raise RuntimeError('Dynamic embedding state_dict required.') + + # Entity type log probabilities: (batch_size, 2) + entity_type_logits = self._entity_type_projection(hidden) + entity_type_logp = F.log_softmax(entity_type_logits, -1) + + # Entity id log probabilities: (batch_size, max_embeddings) + entity_id_logits = self._dynamic_embeddings(hidden, timestep)['logits'] + entity_id_logp = F.log_softmax(entity_id_logits, -1) + + # Mention length log probabilites: (batch_size, max_embeddings x max_mention_lengths) + # NOTE: Entity id is guaranteed to be zero at initialization + embeddings = self._dynamic_embeddings.embeddings + concatenated = torch.cat((hidden.unsqueeze(1).expand_as(embeddings), embeddings), dim=-1) + mention_length_logits = self._mention_length_projection(concatenated) + mention_length_logp = F.log_softmax(mention_length_logits, -1).view(batch_size, -1) + + # Add together log probabilities + logp = torch.zeros((batch_size, self.num_possible_annotations)) + logp[:, 0] += entity_type_logp[:, 0] + logp[:, 1:] += entity_type_logp[:, 1:] + logp[:, 1:] += entity_id_logp.repeat((1, self._max_mention_length)) + logp[:, 1:] += mention_length_logp + + return logp + + def _top_k_annotations(self, logp: torch.FloatTensor, k: int): + """Extracts the top-k annotations. + + Parameters + ========== + logp : torch.Tensor + A (batch_size, beam_width, num_annotations tensor) + """ + batch_size = logp.shape[0] + # Get the top canidates from each beam (makes math much eaiser) + top_logp, top_indices = logp.topk(k, dim=-1) + + # Next flatten + flat_logp = top_logp.view(batch_size, -1) + flat_indices = top_indices.view(batch_size, -1) + + # Next get the top-k overall + top_logp = flat_logp.topk(k, dim=-1) + output_dict = { + 'entity_types': self.entity_type_lookup.take(top_indices), + 'entity_ids': self.entity_id_lookup.take(top_indices), + 'mention_lengths': self.mention_length_lookup.take(top_indices) + } + return top_logp, output_dict + def _beam_step_fn(self, hidden: torch.FloatTensor, timestep: int, @@ -355,7 +445,7 @@ def _beam_step_fn(self, # shape: (batch_size, num_entities, embedding_dim) embeddings = self._dynamic_embeddings.embeddings[:, :1] # Only one entity right now - concatenated = torch.cat((hidden.unsqueeze(1), embeddings), dim=-1) # Unsqueeze? + concatenated = torch.cat((hidden.unsqueeze(1), embeddings), dim=-1) mention_length_logits = self._mention_length_projection(concatenated) # shape: (batch_size, num_entities, max_mention_length) @@ -365,7 +455,6 @@ def _beam_step_fn(self, # Use meshgrid to get the Cartesian product of entity id and mention length options. # shape: (num_entities, max_mention_length) - # TODO: Can mention_length = 0? entity_ids, mention_lengths = torch.meshgrid(torch.arange(1, dtype=torch.int64), torch.arange(self._max_mention_length, dtype=torch.int64)) entity_ids = entity_ids.view(-1) @@ -389,27 +478,11 @@ def _beam_step_fn(self, top_logp, top_indices = logp.topk(k, dim=-1) output_dict = { 'entity_types': torch.take(entity_types, top_indices), - 'entity_ids': torch.take(entity_ids, top_indices) + 'entity_ids': torch.take(entity_ids, top_indices), 'mention_lengths': torch.take(mention_lengths, top_indices) } return top_logp, output_dict - - - - - - - - - - - - - - - - def predict_top_k(self, source: Dict[str, torch.Tensor], reset: torch.ByteTensor, @@ -618,6 +691,7 @@ def _forward_loop(self, return output_dict + def reset_states(self, reset: torch.ByteTensor) -> None: """Resets the model's internals. Should be called at the start of a new batch.""" if reset.any() and (self._state is not None): diff --git a/kglm/tests/models/entity_nlm_test.py b/kglm/tests/models/entity_nlm_test.py index 778834b..69016ee 100644 --- a/kglm/tests/models/entity_nlm_test.py +++ b/kglm/tests/models/entity_nlm_test.py @@ -33,14 +33,25 @@ def test_model_can_train_save_and_load(self): self.ensure_model_can_train_save_and_load(self.param_file, gradients_to_ignore=['_dummy_context_embedding']) - def test_beam_step_fn(self): + def test_annotation_logp(self): batch_size = 2 # Need to reset the states reset = torch.ByteTensor([1] * batch_size) self.model.reset_states(reset) - # First time step + # Apply to random hidden state hidden = torch.randn(batch_size, self.model._embedding_dim) - self.model._beam_step_fn(hidden, 0, k=10) + logp = self.model._annotation_logp(hidden, timestep=0) + # Check that output has correct shape + assert tuple(logp.shape) == (batch_size, self.model.num_possible_annotations) + + # Check that state dict can be fed to function... + state_dict = { + 'dynamic_embeddings_state_dict': self.model._dynamic_embeddings.state_dict() + } + logp_prime = self.model._annotation_logp(hidden, timestep=0, state_dict=state_dict) + + # ...and that output is the same as before + assert torch.allclose(logp, logp_prime) From d78d765beb20ee6abdd52d4e2a27c9ba3192a595 Mon Sep 17 00:00:00 2001 From: Robert Logan Date: Tue, 22 Oct 2019 22:59:57 -0700 Subject: [PATCH 05/35] Most of the beam search logic implemented --- kglm/models/entity_disc.py | 279 ++++++++++++++------------- kglm/tests/models/entity_nlm_test.py | 55 +++++- 2 files changed, 197 insertions(+), 137 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 60f93b0..e715cb2 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -279,7 +279,6 @@ def sample(self, # pylint: disable=unused-argument # Add / update entity embeddings new_entities = entity_ids[:, timestep] == self._dynamic_embeddings.num_embeddings self._dynamic_embeddings.add_embeddings(timestep, new_entities) - self._dynamic_embeddings.update_embeddings(hidden=current_hidden, update_indices=entity_ids[:, timestep], timestep=timestep, @@ -325,18 +324,18 @@ def entity_type_lookup(self): @property def entity_id_lookup(self): - entity_id_lookup = [0] + list(range(self._max_embeddings) * self._max_mention_length) + entity_id_lookup = [0] + list(range(self._max_embeddings)) * self._max_mention_length return torch.LongTensor(entity_id_lookup) @property def mention_length_lookup(self): - mention_length_lookup = [1] + list([i] * self._max_embeddings for i in range(self._max_mention_length)) + mention_length_lookup = [1] + [i for i in range(self._max_mention_length) for _ in range(self._max_embeddings)] return torch.LongTensor(mention_length_lookup) def _annotation_logp(self, hidden: torch.FloatTensor, timestep: int, - state_dict: Dict[str, Any] = None) -> torch.Tensor: + beam_states: List[Dict[str, Any]]) -> torch.Tensor: """Computes the log-probability of all possible annotations for a single beam state. Parameters @@ -348,33 +347,55 @@ def _annotation_logp(self, A tensor of log-probabilities for the possible annotations of shape (batch_size, num_annotations). """ - batch_size, hidden_dim = hidden.shape - if state_dict is not None: - self._dynamic_embeddings.load_state_dict(state_dict['dynamic_embeddings_state_dict']) - elif timestep > 0: - raise RuntimeError('Dynamic embedding state_dict required.') - - # Entity type log probabilities: (batch_size, 2) - entity_type_logits = self._entity_type_projection(hidden) - entity_type_logp = F.log_softmax(entity_type_logits, -1) - - # Entity id log probabilities: (batch_size, max_embeddings) - entity_id_logits = self._dynamic_embeddings(hidden, timestep)['logits'] - entity_id_logp = F.log_softmax(entity_id_logits, -1) - - # Mention length log probabilites: (batch_size, max_embeddings x max_mention_lengths) - # NOTE: Entity id is guaranteed to be zero at initialization - embeddings = self._dynamic_embeddings.embeddings - concatenated = torch.cat((hidden.unsqueeze(1).expand_as(embeddings), embeddings), dim=-1) - mention_length_logits = self._mention_length_projection(concatenated) - mention_length_logp = F.log_softmax(mention_length_logits, -1).view(batch_size, -1) - - # Add together log probabilities - logp = torch.zeros((batch_size, self.num_possible_annotations)) - logp[:, 0] += entity_type_logp[:, 0] - logp[:, 1:] += entity_type_logp[:, 1:] - logp[:, 1:] += entity_id_logp.repeat((1, self._max_mention_length)) - logp[:, 1:] += mention_length_logp + batch_size, hidden_dim = hidden.shape + logp = hidden.new_zeros((batch_size, len(beam_states), self.num_possible_annotations)) + + for i, beam_state in enumerate(beam_states): + self._dynamic_embeddings.load_state_dict(beam_state) + + # Entity type log probabilities: (batch_size, 2) + entity_type_logits = self._entity_type_projection(hidden) + entity_type_logp = F.log_softmax(entity_type_logits, -1) + + # Entity id log probabilities: (batch_size, max_embeddings) + entity_id_logits = self._dynamic_embeddings(hidden, timestep)['logits'] + entity_id_logp = F.log_softmax(entity_id_logits, -1) + + # Mention length log probabilites: (batch_size, max_embeddings x max_mention_lengths) + # NOTE: Entity id is guaranteed to be zero at initialization + embeddings = self._dynamic_embeddings.embeddings + concatenated = torch.cat((hidden.unsqueeze(1).expand_as(embeddings), embeddings), dim=-1) + mention_length_logits = self._mention_length_projection(concatenated) + mention_length_logp = F.log_softmax(mention_length_logits, -1).view(batch_size, -1) + + logp[:, i, 0] += entity_type_logp[:, 0] + logp[:, i, 1:] += entity_type_logp[:, 1:] + logp[:, i, 1:] += entity_id_logp.repeat((1, self._max_mention_length)) + logp[:, i ,1:] += mention_length_logp + + return logp + + def _adjust_for_ongoing_mentions(self, + logp: torch.FloatTensor, + output: Dict[str, torch.FloatTensor] = None) -> torch.FloatTensor: + """Fixes logp so that ongoing mentions are deterministic.""" + if output is None: + return logp + + mention_lengths = output['mention_lengths'] + entity_ids = output['entity_ids'] + + # Find ongoing mentions. + ongoing = mention_lengths > 1 + + # Make probability zero for all ongoing entries... + logp[ongoing] = -float('inf') + + # ...except the deterministic output + new_lengths = mention_lengths[ongoing] - 1 + entity_ids = entity_ids[ongoing] + annotation_idx = 1 + entity_ids + new_lengths * self._max_embeddings + logp[ongoing, annotation_idx] = 0 return logp @@ -387,106 +408,98 @@ def _top_k_annotations(self, logp: torch.FloatTensor, k: int): A (batch_size, beam_width, num_annotations tensor) """ batch_size = logp.shape[0] - # Get the top canidates from each beam (makes math much eaiser) + # Get the top canidates from each beam + # (batch_size, beam_width, k) top_logp, top_indices = logp.topk(k, dim=-1) # Next flatten + # (batch_size, beam_width * k) flat_logp = top_logp.view(batch_size, -1) flat_indices = top_indices.view(batch_size, -1) - # Next get the top-k overall - top_logp = flat_logp.topk(k, dim=-1) - output_dict = { - 'entity_types': self.entity_type_lookup.take(top_indices), - 'entity_ids': self.entity_id_lookup.take(top_indices), - 'mention_lengths': self.mention_length_lookup.take(top_indices) - } - return top_logp, output_dict - - def _beam_step_fn(self, - hidden: torch.FloatTensor, - timestep: int, - k: int, - last_predictions: Optional[Dict[str, torch.Tensor]] = None, - state: Optional[Dict[str, Any]] = None) -> Tuple[torch.Tensor, torch.Tensor]: - """ - This function computes the top-k most likely next states for the beam. + # Get the true top k + # (batch_size, k) + top_logp, top_indices = flat_logp.topk(k, dim=-1) - Parameters - ========== + # Retrieve backpointers from the indices + # (batch_size, k) + backpointers = top_indices // self.num_possible_annotations - Returns - ======= - A tuple comprised of: - next - """ - batch_size, hidden_dim = hidden.shape + # Also need to index correctly into the lookup + lookup_indices = flat_indices.gather(-1, top_indices) - # Initial predictions are a special case since we only have to consider batch_size inputs - # and don't need to deal with ongoing mentions. - if last_predictions is None: - # Step 1 - Compute type of next token - entity_type_logits = self._entity_type_projection(hidden) + # Use lookup indices to get the top annotation variables + top_entity_types = self.entity_type_lookup.take(lookup_indices) + top_entity_ids = self.entity_id_lookup.take(lookup_indices) + top_mention_lengths = self.mention_length_lookup.take(lookup_indices) - # shape: (batch_size, 2) - entity_type_logp = F.log_softmax(entity_type_logits, -1) + output = { + 'logp': top_logp, + 'backpointers': backpointers, + 'entity_types': self.entity_type_lookup.take(lookup_indices), + 'entity_ids': self.entity_id_lookup.take(lookup_indices), + 'mention_lengths': self.mention_length_lookup.take(lookup_indices) + } + return output - # Step 2 - Compute probabilities for each entity - # TODO: Take this shit out - its obvious... - entity_id_prediction_outputs = self._dynamic_embeddings(hidden=hidden, - timestep=timestep) - entity_id_logits = entity_id_prediction_outputs['logits'] + def _update_beam_states(self, + hidden: torch.FloatTensor, + timestep: int, + beam_states: List[Dict[str, Any]], + output: Dict[str, torch.Tensor]) -> List[Dict[str, Any]]: + """ + Given the new beam predictions we need to add/update entity embeddings. Before we can do + this we need to follow the backpointers to assemble correct tensors of the entity embeddings. - # shape: (batch_size, num_entities) - entity_id_logp = F.log_softmax(entity_id_logits, -1) # Technically should mask + """ - # Step 3 - Compute mention length probabilities for each entity + logp = output['logp'] + backpointers = output['backpointers'] + batch_size, k = logp.shape + + # Concat all the dynamic entity embeddings and trace backpointers to make sure the proper + # embeddings are loaded for each beam. + all_prev_entity_embeddings = logp.new_zeros(batch_size, len(beam_states), self._max_embeddings, self._embedding_dim) + all_prev_num_entities = backpointers.new_zeros(batch_size, len(beam_states)) + all_prev_last_seen = backpointers.new_zeros(batch_size, len(beam_states), self._max_embeddings) + for i, beam_state in enumerate(beam_states): + self._dynamic_embeddings.load_state_dict(beam_state) + all_prev_entity_embeddings[:, i] = self._dynamic_embeddings.embeddings + all_prev_num_entities[:, i] = self._dynamic_embeddings.num_embeddings + all_prev_last_seen[:, i] = self._dynamic_embeddings.last_seen + + new_beam_states: List[Dict[str, Any]] = [] + for i in range(k): + # Trace backpointers to get correct params + self._dynamic_embeddings.embeddings = all_prev_entity_embeddings[torch.arange(batch_size), backpointers[:, i]] + self._dynamic_embeddings.num_entities = all_prev_num_entities[torch.arange(batch_size), backpointers[:, i]] + self._dynamic_embeddings.last_seen = all_prev_last_seen[torch.arange(batch_size), backpointers[:, i]] + + # Add and update embeddings + entity_ids = output['entity_ids'][:, i] + entity_types = output['entity_types'][:, i] + new_entities = entity_ids == self._dynamic_embeddings.num_embeddings + self._dynamic_embeddings.add_embeddings(timestep, new_entities) + self._dynamic_embeddings.update_embeddings(hidden=hidden, + update_indices=entity_ids, + timestep=timestep, + mask=entity_types) - # shape: (batch_size, num_entities, embedding_dim) - embeddings = self._dynamic_embeddings.embeddings[:, :1] # Only one entity right now + new_beam_states.append(self._dynamic_embeddings.state_dict()) - concatenated = torch.cat((hidden.unsqueeze(1), embeddings), dim=-1) - mention_length_logits = self._mention_length_projection(concatenated) + return new_beam_states + + @staticmethod + def _trace_backpointers(source: Dict[str, torch.Tensor], + reset: torch.ByteTensor, + k: int, + predictions: List[Dict[str, torch.Tensor]]) -> Dict[str, Any]: + pass - # shape: (batch_size, num_entities, max_mention_length) - mention_length_logp = F.log_softmax(mention_length_logits, -1) - - # Step 4 - Combine log probabilities - - # Use meshgrid to get the Cartesian product of entity id and mention length options. - # shape: (num_entities, max_mention_length) - entity_ids, mention_lengths = torch.meshgrid(torch.arange(1, dtype=torch.int64), - torch.arange(self._max_mention_length, dtype=torch.int64)) - entity_ids = entity_ids.view(-1) - mention_lengths = mention_lengths.view(-1) - entity_types = torch.ones_like(entity_ids, dtype=torch.uint8) - - # When the type is not a mention there is only one possible annotation. Add it. - entity_types = torch.cat((torch.ByteTensor([0]), entity_types)) - entity_ids = torch.cat((torch.LongTensor([0]), entity_ids)) - mention_lengths = torch.cat((torch.LongTensor([1]), mention_lengths)) - num_annotations = entity_types.shape[0] - - logp = torch.zeros(size=(batch_size, num_annotations), - dtype=torch.float32, - device=hidden.device) - logp[:, 0] = entity_type_logp[:, 0] - logp[:, 1:] = entity_type_logp[:, 1].unsqueeze(-1) - logp[:, 1:] += entity_id_logp.view(batch_size, -1) - logp[:, 1:] += mention_length_logp.view(batch_size, -1) - - top_logp, top_indices = logp.topk(k, dim=-1) - output_dict = { - 'entity_types': torch.take(entity_types, top_indices), - 'entity_ids': torch.take(entity_ids, top_indices), - 'mention_lengths': torch.take(mention_lengths, top_indices) - } - return top_logp, output_dict - - def predict_top_k(self, - source: Dict[str, torch.Tensor], - reset: torch.ByteTensor, - k: int) -> Tuple[torch.Tensor, torch.Tensor]: + def beam_search(self, + source: Dict[str, torch.Tensor], + reset: torch.ByteTensor, + k: int) -> Tuple[torch.Tensor, torch.Tensor]: """ Obtain the top-k (approximately) most likely predictions from the model using beam search. Unlike typical beam search all of the beam states are returned instead of just @@ -526,26 +539,34 @@ def predict_top_k(self, prev_mention_lengths = source['tokens'].new_ones(batch_size) # Embed and encode the tokens up front. - embeddings = self._text_field_embedder(source['tokens']) + embeddings = self._text_field_embedder(source) hidden = self._rnn(embeddings) - # The rest of the code will be a modified version of: - # https://github.com/allenai/allennlp/blob/master/allennlp/nn/beam_search.py - - # List of predictions. One for each time step. Unlike in allennlp's beam search, here each - # element is a dictionary of (batch_size, beam_size) tensors whose keys are the annotation - # variables. + # Beam search logic predictions: List[Dict[str, torch.Tensor]] = [] + beam_states = [self._dynamic_embeddings.state_dict()] + output = None + for timestep in range(1, sequence_length): + # Get log probabilities of annotations + # (batch_size, k, num_annotations) + logp = self._annotation_logp(hidden[:, timestep], timestep, beam_states) - # List of (batch_size, beam_size) tensors. One for each time step. Stores the index n of - # the parent prediction it came from. - backpointers: List[torch.LongTensor] = [] + # Add to cumulative log probabilities of beams (which have shape (batch_size, k)) + if output: + logp += output['logp'].unsqueeze(-1) - # To keep things simple, we are going to store the beam states - for timestep in range(sequence_length): - pass + # Accout for ongoing mentions + logp = self._adjust_for_ongoing_mentions(logp, output) + + output = self._top_k_annotations(logp, k) + beam_states = self._update_beam_states(hidden[:, timestep], timestep, beam_states, output) + predictions.append(output) + + # Trace backpointers to get annotation. + annotation = self._trace_backpointers(source, reset, k, output) + + return annotation - # Trace backpointers to get output. def _forward_loop(self, tokens: Dict[str, torch.Tensor], diff --git a/kglm/tests/models/entity_nlm_test.py b/kglm/tests/models/entity_nlm_test.py index 69016ee..4f098e6 100644 --- a/kglm/tests/models/entity_nlm_test.py +++ b/kglm/tests/models/entity_nlm_test.py @@ -42,16 +42,55 @@ def test_annotation_logp(self): # Apply to random hidden state hidden = torch.randn(batch_size, self.model._embedding_dim) - logp = self.model._annotation_logp(hidden, timestep=0) + beam_states = [self.model._dynamic_embeddings.state_dict()] + logp = self.model._annotation_logp(hidden, timestep=0, beam_states=beam_states) # Check that output has correct shape - assert tuple(logp.shape) == (batch_size, self.model.num_possible_annotations) + assert tuple(logp.shape) == (batch_size, 1, self.model.num_possible_annotations) - # Check that state dict can be fed to function... - state_dict = { - 'dynamic_embeddings_state_dict': self.model._dynamic_embeddings.state_dict() + def test_adjust_for_ongoing_mentions(self): + batch_size = 2 + k = 3 + + # Construct an example where the top-beam state for the second sequence in the batch is an ongoing mention + logp = torch.zeros(batch_size, k, self.model.num_possible_annotations) + output = { + 'entity_ids': torch.LongTensor([[0, 0, 0], [3, 0, 0]]), + 'mention_lengths': torch.LongTensor([[1, 1, 1],[3, 1, 1]]) } - logp_prime = self.model._annotation_logp(hidden, timestep=0, state_dict=state_dict) - # ...and that output is the same as before - assert torch.allclose(logp, logp_prime) + # See that adjustment works + logp = self.model._adjust_for_ongoing_mentions(logp, output) + assert logp[0, 0, 0] == 0.0 # Should be unaffected + assert logp[1, 0, 0] == -float('inf') # Should be affected + + # Only element with probability should have entity id == 3 and mention length == 2 + pred = logp[1, 0].argmax() + assert self.model.entity_id_lookup[pred] == 3 + assert self.model.mention_length_lookup[pred] == 2 + + def test_top_k_annotations(self): + batch_size = 2 + k = 3 + + # Check works correctly at start (e.g. if beam size is 1) + logp = torch.randn(batch_size, 1, self.model.num_possible_annotations) + annotations = self.model._top_k_annotations(logp, k) + + assert tuple(annotations['logp'].shape) == (batch_size, k) + assert torch.allclose(annotations['backpointers'], torch.zeros(batch_size, k, dtype=torch.int64)) + + # Check works correctly for other timesteps (e.g. previous beam size is k) + logp = torch.randn(batch_size, k, self.model.num_possible_annotations) + annotations = self.model._top_k_annotations(logp, k) + + assert tuple(annotations['logp'].shape) == (batch_size, k) + + def test_beam_search(self): + batch_size = 2 + seq_len = 10 + k = 3 + vocab_size = self.model.vocab.get_vocab_size('tokens') + source = {'tokens': torch.randint(vocab_size, size=(batch_size, seq_len))} + reset = torch.ones(batch_size, dtype=torch.uint8) + self.model.beam_search(source, reset, k) From 495d5b3cd5cf9bdf91e738214fcefb77c9def4b8 Mon Sep 17 00:00:00 2001 From: Robert Logan Date: Wed, 23 Oct 2019 06:54:43 -0700 Subject: [PATCH 06/35] Seemingly working beam search - needs more rigorous testing --- kglm/models/entity_disc.py | 49 +++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index e715cb2..ab9e768 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -429,16 +429,16 @@ def _top_k_annotations(self, logp: torch.FloatTensor, k: int): lookup_indices = flat_indices.gather(-1, top_indices) # Use lookup indices to get the top annotation variables - top_entity_types = self.entity_type_lookup.take(lookup_indices) - top_entity_ids = self.entity_id_lookup.take(lookup_indices) - top_mention_lengths = self.mention_length_lookup.take(lookup_indices) + entity_types = self.entity_type_lookup.take(lookup_indices) + entity_ids = self.entity_id_lookup.take(lookup_indices) + mention_lengths = self.mention_length_lookup.take(lookup_indices) output = { 'logp': top_logp, 'backpointers': backpointers, - 'entity_types': self.entity_type_lookup.take(lookup_indices), - 'entity_ids': self.entity_id_lookup.take(lookup_indices), - 'mention_lengths': self.mention_length_lookup.take(lookup_indices) + 'entity_types': entity_types, + 'entity_ids': entity_ids, + 'mention_lengths': mention_lengths } return output @@ -452,7 +452,6 @@ def _update_beam_states(self, this we need to follow the backpointers to assemble correct tensors of the entity embeddings. """ - logp = output['logp'] backpointers = output['backpointers'] batch_size, k = logp.shape @@ -494,7 +493,39 @@ def _trace_backpointers(source: Dict[str, torch.Tensor], reset: torch.ByteTensor, k: int, predictions: List[Dict[str, torch.Tensor]]) -> Dict[str, Any]: - pass + batch_size, seq_length = source['tokens'].shape + + new_reset = reset.repeat(1, k).view(batch_size * k) + new_source = {key: value.repeat(1, k, 1).view(batch_size * k, -1) for key, value in source.items()} + + entity_types = [] + entity_ids = [] + mention_lengths = [] + backpointer = None + + for prediction in reversed(predictions): + if backpointer is None: + entity_types.append(prediction['entity_types']) + entity_ids.append(prediction['entity_ids']) + mention_lengths.append(prediction['mention_lengths']) + else: + entity_types.append(prediction['entity_types'].gather(1, backpointer)) + entity_ids.append(prediction['entity_ids'].gather(1, backpointer)) + mention_lengths.append(prediction['mention_lengths'].gather(1, backpointer)) + backpointer = prediction['backpointers'] + + entity_types = torch.stack(entity_types[::-1], dim=-1).view(batch_size * k, -1) + entity_ids = torch.stack(entity_ids[::-1], dim=-1).view(batch_size * k, -1) + mention_lengths = torch.stack(mention_lengths[::-1], dim=-1).view(batch_size * k , -1) + + return { + 'reset': new_reset, + 'source': new_source, + 'entity_types': entity_types, + 'entity_ids': entity_ids, + 'mention_lengths': mention_lengths + } + def beam_search(self, source: Dict[str, torch.Tensor], @@ -563,7 +594,7 @@ def beam_search(self, predictions.append(output) # Trace backpointers to get annotation. - annotation = self._trace_backpointers(source, reset, k, output) + annotation = self._trace_backpointers(source, reset, k, predictions) return annotation From 3791641bd7f0f254c37ddace33a6b1bffd59cc03 Mon Sep 17 00:00:00 2001 From: Robert Logan Date: Wed, 23 Oct 2019 07:08:01 -0700 Subject: [PATCH 07/35] Added beam-sum command --- kglm/commands/__init__.py | 1 + kglm/commands/beamsum.py | 196 +++++++++++++++++++++++++++++++++++++ kglm/models/entity_disc.py | 2 - kglm/run.py | 2 + 4 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 kglm/commands/beamsum.py diff --git a/kglm/commands/__init__.py b/kglm/commands/__init__.py index 2d5dd9c..d4e0ae9 100644 --- a/kglm/commands/__init__.py +++ b/kglm/commands/__init__.py @@ -1,2 +1,3 @@ from .evaluate_perplexity import EvaluatePerplexity from .complete_the_sentence import CompleteTheSentence +from .beamsum import BeamSum diff --git a/kglm/commands/beamsum.py b/kglm/commands/beamsum.py new file mode 100644 index 0000000..b979e74 --- /dev/null +++ b/kglm/commands/beamsum.py @@ -0,0 +1,196 @@ +import argparse +import json +import logging +import math +from typing import Any, Dict, Iterator + +from allennlp.commands.subcommand import Subcommand +from allennlp.common.util import prepare_environment +from allennlp.common.checks import check_for_gpu +from allennlp.common.tqdm import Tqdm +from allennlp.data import Instance +from allennlp.data.dataset_readers.dataset_reader import DatasetReader +from allennlp.data.iterators import BasicIterator, DataIterator +from allennlp.models import Model +from allennlp.models.archival import load_archive +from allennlp.nn import util +import numpy as np +import torch + +logger = logging.getLogger(__name__) + + +class BeamSum(Subcommand): + def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argparse.ArgumentParser: + # pylint: disable=protected-access + description = '''Upper bound the specified model perplexity using beam search''' + subparser = parser.add_parser(name, description=description, + help='Evaluate the specified module using importance sampling') + + subparser.add_argument('model_archive_file', type=str, help='path to an archived trained model') + + subparser.add_argument('sampler_archive_file', type=str, + help='path to an archived trained model for generating samples') + + subparser.add_argument('input_file', type=str, help='path to the file containing the evaluation data') + + subparser.add_argument('--output-file', type=str, help='path to output file') + + subparser.add_argument('--weights-file', + type=str, + help='a path that overrides which weights file to use') + + cuda_device = subparser.add_mutually_exclusive_group(required=False) + cuda_device.add_argument('--cuda-device', + type=int, + default=-1, + help='id of GPU to use (if any)') + + subparser.add_argument('-o', '--overrides', + type=str, + default="", + help='a JSON structure used to override the experiment configuration') + + subparser.add_argument('--batch-size', + type=int, + default=None, + help='Batch size (default: whatever iterator was set to)') + + subparser.add_argument('--split-size', + type=int, + default=None, + help='Split size (default: whatever iterator was set to)') + + subparser.add_argument('-k', '--beam-width', + type=int, + default=None, + help='Beam width') + + return subparser + + +def logsumexp(prev: torch.FloatTensor, + current: torch.FloatTensor, + i: int, + samples_per_batch: int): + # NOTE: n is number of samples + current_avg = current.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() + if prev is None: + return current_avg + a = torch.max(prev, current_avg) + sumexp = torch.exp(prev - a) * i / (i + 1) + torch.exp(current_avg - a) / (i + 1) + return a + torch.log(sumexp) + + +def evaluate_perplexity(model: Model, + sampler: Model, + num_samples: int, + instances: Iterator[Instance], + data_iterator: DataIterator, + cuda_device: int, + beam_width: int) -> Dict[str, Any]: + check_for_gpu(cuda_device) + + logger.info('Iterating over dataset') + + weight = None + + iterator = data_iterator(instances, num_epochs=1, shuffle=False) + generator_tqdm = Tqdm.tqdm(iterator, total=0) + + model.eval() + sampler.eval() + sampler._state = None + + summand = None + denom = None + #summand = torch.tensor(0.0) + # penalized_summand = torch.tensor(0.0) + + held_over_data = None + + for batch, _ in generator_tqdm: + + # We need sequence length to help compute perplexity + n_tokens = util.get_text_field_mask(batch['source']).float().sum(dim=-1) + if denom is None: + denom = n_tokens + else: + denom += n_tokens + + summand = util.move_to_device(summand, cuda_device) + batch = util.move_to_device(batch, cuda_device) + + # Draw a sample + with torch.no_grad(): + sample = sampler.beam_search(batch['source'], + batch['reset'], + beam_width) + + # Evaluate on sample + with torch.no_grad(): + model_output = model(**sample) + + model_logp = model_output['logp'] + + print(torch.exp(-model_logp / n_tokens)) + + if summand is None: + summand = model_logp + else: + summand += model_logp + + ppl = torch.exp(-summand / denom) + + metrics = { + 'ppl': ppl + } + return metrics + +def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: + # Disable some of the more verbose logging statements + logging.getLogger('allennlp.common.params').disabled = True + logging.getLogger('allennlp.nn.initializers').disabled = True + logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) + + # Load model from archive + model_archive = load_archive(args.model_archive_file, args.cuda_device, args.overrides, args.weights_file) + config = model_archive.config + prepare_environment(config) + model = model_archive.model + model.eval() + + # Load sampler + sampler_archive = load_archive(args.sampler_archive_file, args.cuda_device, args.overrides, args.weights_file) + sampler = sampler_archive.model + sampler.eval() + + # Load the evaluation data. NOTE: We are using the model's reader! + validation_dataset_reader_params = config.pop('validation_dataset_reader', None) + if validation_dataset_reader_params is not None: + dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) + else: + dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) + evaluation_data_path = args.input_file + logger.info('Reading evaluation data from: %s', evaluation_data_path) + instances = dataset_reader.read(evaluation_data_path) + + # To avoid hairy issues with splitting, we opt to use a basic iterator so that we can + # generate samples for entire sequences. + iterator_params = config.pop('iterator', 'None') + if args.batch_size is not None: + iterator_params['batch_size'] = args.batch_size + if args.split_size is not None: + iterator_params['split_size'] = args.split_size + iterator_params['truncate'] = False + iterator = DataIterator.from_params(iterator_params) + iterator.index_with(model.vocab) + metrics = evaluate_perplexity(model, sampler, args.num_samples, instances, + iterator, args.cuda_device, args.beam_width) + + logger.info('Finished evaluating.') + logger.info('Metrics:') + for key, metric in metrics.items(): + logger.info('%s: %s', key, metric) + + return metrics diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index ab9e768..bcd6a15 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -526,7 +526,6 @@ def _trace_backpointers(source: Dict[str, torch.Tensor], 'mention_lengths': mention_lengths } - def beam_search(self, source: Dict[str, torch.Tensor], reset: torch.ByteTensor, @@ -598,7 +597,6 @@ def beam_search(self, return annotation - def _forward_loop(self, tokens: Dict[str, torch.Tensor], entity_types: torch.Tensor, diff --git a/kglm/run.py b/kglm/run.py index 708b16a..e961cba 100644 --- a/kglm/run.py +++ b/kglm/run.py @@ -16,10 +16,12 @@ from allennlp.commands import main from kglm.commands import EvaluatePerplexity from kglm.commands import CompleteTheSentence +from kglm.commands import BeamSum if __name__ == "__main__": main(prog="allennlp", subcommand_overrides={ 'evaluate-perplexity': EvaluatePerplexity(), 'complete-the-sentence': CompleteTheSentence(), + 'beam-sum': BeamSum() }) From 9ef58eee9a9248acf39ba0a72203fa5cc1690a15 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 28 Oct 2019 16:25:18 -0700 Subject: [PATCH 08/35] Fixed logp calculations / annotation offsets --- kglm/models/entity_disc.py | 40 +++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index bcd6a15..51cd4eb 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -17,7 +17,7 @@ import torch import torch.nn.functional as F -from kglm.modules import DynamicEmbedding, WeightDroppedLstm +from kglm.modules import DynamicEmbedding, WeightDroppedLstm from kglm.nn.util import sample_from_logp logger = logging.getLogger(__name__) @@ -324,12 +324,12 @@ def entity_type_lookup(self): @property def entity_id_lookup(self): - entity_id_lookup = [0] + list(range(self._max_embeddings)) * self._max_mention_length + entity_id_lookup = [0] + [i for i in range(self._max_embeddings) for _ in range(self._max_mention_length)] return torch.LongTensor(entity_id_lookup) @property def mention_length_lookup(self): - mention_length_lookup = [1] + [i for i in range(self._max_mention_length) for _ in range(self._max_embeddings)] + mention_length_lookup = [1] + list(range(self._max_mention_length)) * self._max_embeddings return torch.LongTensor(mention_length_lookup) def _annotation_logp(self, @@ -345,7 +345,7 @@ def _annotation_logp(self, Returns ======= A tensor of log-probabilities for the possible annotations of shape - (batch_size, num_annotations). + (batch_size, sequence_length, num_annotations). """ batch_size, hidden_dim = hidden.shape logp = hidden.new_zeros((batch_size, len(beam_states), self.num_possible_annotations)) @@ -368,9 +368,12 @@ def _annotation_logp(self, mention_length_logits = self._mention_length_projection(concatenated) mention_length_logp = F.log_softmax(mention_length_logits, -1).view(batch_size, -1) + # Lastly, we need to tile entity id log probs properly. + entity_id_logp = entity_id_logp.unsqueeze(-1).repeat(1, 1, self._max_mention_length).view(batch_size, -1) + logp[:, i, 0] += entity_type_logp[:, 0] logp[:, i, 1:] += entity_type_logp[:, 1:] - logp[:, i, 1:] += entity_id_logp.repeat((1, self._max_mention_length)) + logp[:, i, 1:] += entity_id_logp logp[:, i ,1:] += mention_length_logp return logp @@ -394,7 +397,7 @@ def _adjust_for_ongoing_mentions(self, # ...except the deterministic output new_lengths = mention_lengths[ongoing] - 1 entity_ids = entity_ids[ongoing] - annotation_idx = 1 + entity_ids + new_lengths * self._max_embeddings + annotation_idx = 1 + entity_ids * self._max_mention_length + new_lengths logp[ongoing, annotation_idx] = 0 return logp @@ -423,15 +426,15 @@ def _top_k_annotations(self, logp: torch.FloatTensor, k: int): # Retrieve backpointers from the indices # (batch_size, k) - backpointers = top_indices // self.num_possible_annotations + backpointers = top_indices // k # Also need to index correctly into the lookup lookup_indices = flat_indices.gather(-1, top_indices) # Use lookup indices to get the top annotation variables - entity_types = self.entity_type_lookup.take(lookup_indices) - entity_ids = self.entity_id_lookup.take(lookup_indices) - mention_lengths = self.mention_length_lookup.take(lookup_indices) + entity_types = self.entity_type_lookup.to(device=lookup_indices.device).take(lookup_indices) + entity_ids = self.entity_id_lookup.to(device=lookup_indices.device).take(lookup_indices) + mention_lengths = self.mention_length_lookup.to(device=lookup_indices.device).take(lookup_indices) output = { 'logp': top_logp, @@ -495,8 +498,8 @@ def _trace_backpointers(source: Dict[str, torch.Tensor], predictions: List[Dict[str, torch.Tensor]]) -> Dict[str, Any]: batch_size, seq_length = source['tokens'].shape - new_reset = reset.repeat(1, k).view(batch_size * k) - new_source = {key: value.repeat(1, k, 1).view(batch_size * k, -1) for key, value in source.items()} + new_reset = reset.unsqueeze(1).repeat(1, k).view(batch_size * k) + new_source = {key: value.unsqueeze(1).repeat(1, k, 1).view(batch_size * k, -1) for key, value in source.items()} entity_types = [] entity_ids = [] @@ -512,7 +515,10 @@ def _trace_backpointers(source: Dict[str, torch.Tensor], entity_types.append(prediction['entity_types'].gather(1, backpointer)) entity_ids.append(prediction['entity_ids'].gather(1, backpointer)) mention_lengths.append(prediction['mention_lengths'].gather(1, backpointer)) - backpointer = prediction['backpointers'] + if backpointer is None: + backpointer = prediction['backpointers'] + else: + backpointer = prediction['backpointers'].gather(1, backpointer) entity_types = torch.stack(entity_types[::-1], dim=-1).view(batch_size * k, -1) entity_ids = torch.stack(entity_ids[::-1], dim=-1).view(batch_size * k, -1) @@ -576,18 +582,16 @@ def beam_search(self, predictions: List[Dict[str, torch.Tensor]] = [] beam_states = [self._dynamic_embeddings.state_dict()] output = None - for timestep in range(1, sequence_length): + for timestep in range(sequence_length): # Get log probabilities of annotations # (batch_size, k, num_annotations) logp = self._annotation_logp(hidden[:, timestep], timestep, beam_states) - + # Accout for ongoing mentions + logp = self._adjust_for_ongoing_mentions(logp, output) # Add to cumulative log probabilities of beams (which have shape (batch_size, k)) if output: logp += output['logp'].unsqueeze(-1) - # Accout for ongoing mentions - logp = self._adjust_for_ongoing_mentions(logp, output) - output = self._top_k_annotations(logp, k) beam_states = self._update_beam_states(hidden[:, timestep], timestep, beam_states, output) predictions.append(output) From 42407e6a4ce19acf803aa84b1dc5b9c8e27dc3ea Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 28 Oct 2019 16:28:54 -0700 Subject: [PATCH 09/35] Fixed mention lengths --- kglm/data/dataset_readers/conll2012.py | 8 ++++---- kglm/data/dataset_readers/enhanced_wikitext.py | 4 ++-- kglm/models/entity_disc.py | 14 +++++++------- kglm/tests/dataset_readers/conll2012_test.py | 12 ++++++------ .../dataset_readers/enhanced_wikitext_test.py | 4 ++-- kglm/tests/models/entity_nlm_test.py | 6 +++--- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/kglm/data/dataset_readers/conll2012.py b/kglm/data/dataset_readers/conll2012.py index f5e58e7..2463666 100644 --- a/kglm/data/dataset_readers/conll2012.py +++ b/kglm/data/dataset_readers/conll2012.py @@ -188,7 +188,7 @@ def text_to_instance(self, # type: ignore # Initialize fields. entity_types = np.zeros(shape=(len(tokens),)) entity_ids = np.zeros(shape=(len(tokens),)) - mention_lengths = np.ones(shape=(len(tokens),)) + mention_lengths = np.zeros(shape=(len(tokens),)) if cluster_dict: for cluster, entity_id in cluster_dict.items(): @@ -199,7 +199,7 @@ def text_to_instance(self, # type: ignore entity_ids[cluster[0] + 1:cluster[1] + 1 + 1] = entity_id entity_length = (cluster[1] + 1) - cluster[0] # Fill in mention length - mention_lengths[cluster[0] + 1:cluster[1] + 1 + 1] = np.arange(entity_length, 0, step=-1) + mention_lengths[cluster[0] + 1:cluster[1] + 1 + 1] = np.arange(entity_length, 0, step=-1) - 1 fields['entity_ids'] = SequentialArrayField(entity_ids, dtype=np.int64) fields['mention_lengths'] = SequentialArrayField(mention_lengths, dtype=np.int64) @@ -239,14 +239,14 @@ def text_to_instance(self, entity_types = np.zeros(shape=(len(tokens),)) entity_ids = np.zeros(shape=(len(tokens),)) - mention_lengths = np.ones(shape=(len(tokens),)) + mention_lengths = np.zeros(shape=(len(tokens),)) for i, cluster in enumerate(clusters.values()): for span in cluster: start, end = span entity_types[(start + 1 - self._offset):(end + 1 - self._offset)] = 1 entity_ids[(start + 1 - self._offset):(end + 1 - self._offset)] = i + 1 - mention_lengths[(start + 1 - self._offset):(end + 1 - self._offset)] = np.arange(end - start, 0, step=-1) + mention_lengths[(start + 1 - self._offset):(end + 1 - self._offset)] = np.arange(end - start, 0, step=-1) - 1 fields['entity_types'] = SequentialArrayField(entity_types, dtype=np.uint8) fields['entity_ids'] = SequentialArrayField(entity_ids, dtype=np.int64) diff --git a/kglm/data/dataset_readers/enhanced_wikitext.py b/kglm/data/dataset_readers/enhanced_wikitext.py index 3577d14..dd5d1ec 100644 --- a/kglm/data/dataset_readers/enhanced_wikitext.py +++ b/kglm/data/dataset_readers/enhanced_wikitext.py @@ -91,7 +91,7 @@ def text_to_instance(self, data: Dict[str, Any]) -> Instance: # pylint: disable seen_entities: Set[str] = set() entity_types = np.zeros(shape=(len(tokens),)) entity_ids = np.zeros(shape=(len(tokens),)) - mention_lengths = np.ones(shape=(len(tokens),)) + mention_lengths = np.zeros(shape=(len(tokens),)) # Process annotations for annotation in data['annotations']: @@ -105,7 +105,7 @@ def text_to_instance(self, data: Dict[str, Any]) -> Instance: # pylint: disable # Note: +1 offset to account for start token. entity_types[i] = 1 entity_ids[i] = len(seen_entities) - mention_lengths[i] = length + mention_lengths[i] = length - 1 length -= 1 fields['entity_types'] = SequentialArrayField(entity_types, dtype=np.uint8) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 51cd4eb..f9e5e01 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -227,7 +227,7 @@ def sample(self, # pylint: disable=unused-argument current_hidden = hidden[:, timestep] # We only predict types / ids / lengths if the previous mention is terminated. - predict_mask = prev_mention_lengths == 1 + predict_mask = prev_mention_lengths == 0 predict_mask = predict_mask & mask[:, timestep].byte() if predict_mask.any(): @@ -288,7 +288,7 @@ def sample(self, # pylint: disable=unused-argument # lengths decrease by 1, all other outputs are copied from the previous timestep. Do # not need to add anything to logp since these 'predictions' have probability 1 under # the model. - deterministic_mask = prev_mention_lengths > 1 + deterministic_mask = prev_mention_lengths > 0 deterministic_mask = deterministic_mask & mask[:, timestep].byte() if deterministic_mask.any(): entity_types[deterministic_mask, timestep] = entity_types[deterministic_mask, timestep - 1] @@ -389,7 +389,7 @@ def _adjust_for_ongoing_mentions(self, entity_ids = output['entity_ids'] # Find ongoing mentions. - ongoing = mention_lengths > 1 + ongoing = mention_lengths > 0 # Make probability zero for all ongoing entries... logp[ongoing] = -float('inf') @@ -572,7 +572,7 @@ def beam_search(self, 'sequences have been split). Cannot predict top-K annotations in ' 'this setting!') self.reset_states(reset) - prev_mention_lengths = source['tokens'].new_ones(batch_size) + prev_mention_lengths = source['tokens'].new_zeros(batch_size) # Embed and encode the tokens up front. embeddings = self._text_field_embedder(source) @@ -640,7 +640,7 @@ def _forward_loop(self, # Need to track previous mention lengths in order to know when to measure loss. if self._state is None: - prev_mention_lengths = mention_lengths.new_ones(batch_size) + prev_mention_lengths = mention_lengths.new_zeros(batch_size) else: prev_mention_lengths = self._state['prev_mention_lengths'] @@ -666,7 +666,7 @@ def _forward_loop(self, # We only predict types / ids / lengths if we are not currently in the process of # generating a mention (e.g. if the previous remaining mention length is 1). Indexing / # masking with ``predict_all`` makes it possible to do this in batch. - predict_all = prev_mention_lengths == 1 + predict_all = prev_mention_lengths == 0 predict_all = predict_all & mask[:, timestep].byte() if predict_all.any(): @@ -750,7 +750,7 @@ def reset_states(self, reset: torch.ByteTensor) -> None: """Resets the model's internals. Should be called at the start of a new batch.""" if reset.any() and (self._state is not None): # Zero out any previous elements - self._state['prev_mention_lengths'][reset] = 1 + self._state['prev_mention_lengths'][reset] = 0 # Reset the dynamic embeddings and lstm self._dynamic_embeddings.reset_states(reset) diff --git a/kglm/tests/dataset_readers/conll2012_test.py b/kglm/tests/dataset_readers/conll2012_test.py index 9a8156b..2d84435 100644 --- a/kglm/tests/dataset_readers/conll2012_test.py +++ b/kglm/tests/dataset_readers/conll2012_test.py @@ -54,11 +54,11 @@ def test_read_from_file(self, lazy): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) np.testing.assert_allclose(instances[0]["mention_lengths"].array, - [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, - 1, 1, 1, 6, 5, 4, 3, 2, 1, 1, 1, 1, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1]) + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0]) class TestConll2012JsonlReader: @pytest.mark.parametrize('lazy', (True, False)) @@ -86,4 +86,4 @@ def test_read_from_file(self, lazy, offset): np.testing.assert_allclose(second_instance_entity_ids[(30 - offset):(32 - offset)], np.array([1, 1], dtype=np.int64)) np.testing.assert_allclose(second_instance_mention_lengths[(30 - offset):(32 - offset)], - np.array([2, 1], dtype=np.int64)) \ No newline at end of file + np.array([1, 0], dtype=np.int64)) \ No newline at end of file diff --git a/kglm/tests/dataset_readers/enhanced_wikitext_test.py b/kglm/tests/dataset_readers/enhanced_wikitext_test.py index a3e0c9a..c284673 100644 --- a/kglm/tests/dataset_readers/enhanced_wikitext_test.py +++ b/kglm/tests/dataset_readers/enhanced_wikitext_test.py @@ -23,9 +23,9 @@ def test_read_from_file(self, lazy): np.testing.assert_allclose(instances[1]["entity_ids"].array[:5], [0, 0, 1, 1, 1]) np.testing.assert_allclose(instances[1]["entity_ids"].array[-5:], [0, 0, 0, 0, 0]) np.testing.assert_allclose(instances[1]["mention_lengths"].array[:5], - [1, 1, 5, 4, 3]) + [0, 0, 4, 3, 2]) np.testing.assert_allclose(instances[1]["mention_lengths"].array[-5:], - [1, 1, 1, 1, 1]) + [0, 0, 0, 0, 0]) class TestEnhancedWikitextKglmReader: diff --git a/kglm/tests/models/entity_nlm_test.py b/kglm/tests/models/entity_nlm_test.py index 4f098e6..bc6dadf 100644 --- a/kglm/tests/models/entity_nlm_test.py +++ b/kglm/tests/models/entity_nlm_test.py @@ -56,7 +56,7 @@ def test_adjust_for_ongoing_mentions(self): logp = torch.zeros(batch_size, k, self.model.num_possible_annotations) output = { 'entity_ids': torch.LongTensor([[0, 0, 0], [3, 0, 0]]), - 'mention_lengths': torch.LongTensor([[1, 1, 1],[3, 1, 1]]) + 'mention_lengths': torch.LongTensor([[0, 0, 0], [2, 0, 0]]) } # See that adjustment works @@ -67,7 +67,7 @@ def test_adjust_for_ongoing_mentions(self): # Only element with probability should have entity id == 3 and mention length == 2 pred = logp[1, 0].argmax() assert self.model.entity_id_lookup[pred] == 3 - assert self.model.mention_length_lookup[pred] == 2 + assert self.model.mention_length_lookup[pred] == 1 def test_top_k_annotations(self): batch_size = 2 @@ -93,4 +93,4 @@ def test_beam_search(self): vocab_size = self.model.vocab.get_vocab_size('tokens') source = {'tokens': torch.randint(vocab_size, size=(batch_size, seq_len))} reset = torch.ones(batch_size, dtype=torch.uint8) - self.model.beam_search(source, reset, k) + out = self.model.beam_search(source, reset, k) From 4bd2d19234ce700d8a432464ff1d9c4163248f31 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 28 Oct 2019 17:08:00 -0700 Subject: [PATCH 10/35] Updated EntityNlm for new mention lengths --- kglm/models/entity_nlm.py | 67 ++++++++------------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/kglm/models/entity_nlm.py b/kglm/models/entity_nlm.py index 63b7ffd..f860e74 100644 --- a/kglm/models/entity_nlm.py +++ b/kglm/models/entity_nlm.py @@ -17,7 +17,7 @@ from torch.nn import Parameter import torch.nn.functional as F -from kglm.modules import DynamicEmbedding, WeightDrop +from kglm.modules import DynamicEmbedding, WeightDroppedLstm from kglm.training.metrics import Ppl # from kglm.training.metrics import Perplexity, UnknownPenalizedPerplexity @@ -81,22 +81,7 @@ def __init__(self, self._tie_weights = tie_weights self._variational_dropout_rate = variational_dropout_rate self._dropout_rate = dropout_rate - - # Rnn Encoders. - rnns: List[torch.nn.Module] = [] - for i in range(num_layers): - if i == 0: - input_size = embedding_dim - else: - input_size = hidden_size - if (i == num_layers - 1): - output_size = embedding_dim - else: - output_size = hidden_size - rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) - rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=variational_dropout_rate) for rnn in rnns] - self.rnns = torch.nn.ModuleList(rnns) - + self._rnn = WeightDroppedLstm(num_layers, embedding_dim, hidden_size, variational_dropout_rate) self._state: Optional[StateDict] = None # Input variational dropout @@ -134,9 +119,6 @@ def __init__(self, self._mention_length_accuracy = CategoricalAccuracy() self._perplexity = Ppl() - if tie_weights: - self._vocab_projection.weight = self._text_field_embedder._token_embedders['tokens'].weight # pylint: disable=W0212 - initializer(self) @overrides @@ -248,25 +230,7 @@ def _forward_loop(self, mask = get_text_field_mask(tokens).byte() embeddings = self._text_field_embedder(tokens) embeddings = self._variational_dropout(embeddings) - - current_input = embeddings - hidden_list = [] - for layer, rnn in enumerate(self.rnns): - # Retrieve previous hidden state for layer. - if self._state is not None: - prev_hidden = self._state['layer_%i' % layer] - else: - prev_hidden = None - # Forward-pass. - output, hidden = rnn(current_input, prev_hidden) - output = output.contiguous() - # Update hidden state for layer. - hidden = tuple(h.detach() for h in hidden) - hidden_list.append(hidden) - current_input = output - hidden = current_input - - self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_list)} + hidden = self._rnn(embeddings) # Initialize losses entity_type_loss = 0.0 @@ -314,11 +278,10 @@ def _forward_loop(self, # require access to the **next** hidden state, which does not exist during generation). next_entity_ids = next_entity_ids.clone() # This prevents mutating the source data. next_entity_ids[next_entity_ids == self._dynamic_embeddings.num_embeddings] = 0 - # We only predict the types / ids / lengths of the next mention if we are not currently # in the process of generating it (e.g. if the current remaining mention length is 1). # Indexing / masking with ``predict_all`` makes it possible to do this in batch. - predict_all = (current_mention_lengths == 1) & next_mask + predict_all = (current_mention_lengths == 0) & next_mask if predict_all.any(): # Equation 3 in the paper. @@ -432,12 +395,13 @@ def _forward_loop(self, 'penalized_logp': -total_loss * mask.sum() } - # Update the model state - self._state['prev_tokens'] = {field: tokens[field][:, -1].unsqueeze(1).detach() for field in tokens} - self._state['prev_entity_types'] = entity_types[:, -1].unsqueeze(1).detach() - self._state['prev_entity_ids'] = entity_ids[:, -1].unsqueeze(1).detach() - self._state['prev_mention_lengths'] = mention_lengths[:, -1].unsqueeze(1).detach() - self._state['prev_contexts'] = contexts.detach() + self._state = { + 'prev_tokens': {field: tokens[field][:, -1].unsqueeze(1).detach() for field in tokens}, + 'prev_entity_types': entity_types[:, -1].unsqueeze(1).detach(), + 'prev_entity_ids': entity_ids[:, -1].unsqueeze(1).detach(), + 'prev_mention_lengths': mention_lengths[:, -1].unsqueeze(1).detach(), + 'prev_contexts': contexts.detach() + } return output_dict @@ -449,15 +413,10 @@ def reset_states(self, reset: torch.ByteTensor) -> None: self._state['prev_entity_ids'][reset].zero_() self._state['prev_mention_lengths'][reset].zero_() self._state['prev_contexts'][reset].zero_() - # Zero out the hidden state - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) # Reset the dynamic embeddings self._dynamic_embeddings.reset_states(reset) + self._rnn.reset(reset) def detach_states(self): """Detaches the model's state to enforce truncated backpropagation.""" @@ -488,3 +447,5 @@ def get_metrics(self, reset: bool = False) -> Dict[str, float]: 'ml_acc': self._mention_length_accuracy.get_metric(reset), 'ppl': self._perplexity.get_metric(reset) } + + hidden = self._rnn(embeddings) \ No newline at end of file From eeb16fa2c096e83f0222683aebf52de2438583b3 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Wed, 20 Nov 2019 13:26:23 -0800 Subject: [PATCH 11/35] Fixed copy-related bugs and cleaned up command implementations --- kglm/commands/beamsum.py | 38 +++++++++++----------------- kglm/commands/evaluate_perplexity.py | 9 +++---- kglm/models/entity_disc.py | 27 +++++++++++++------- kglm/models/entity_nlm.py | 21 +++++++++------ kglm/modules/dynamic_embeddings.py | 9 ++++--- 5 files changed, 55 insertions(+), 49 deletions(-) diff --git a/kglm/commands/beamsum.py b/kglm/commands/beamsum.py index b979e74..d4c5c61 100644 --- a/kglm/commands/beamsum.py +++ b/kglm/commands/beamsum.py @@ -61,30 +61,18 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar default=None, help='Split size (default: whatever iterator was set to)') - subparser.add_argument('-k', '--beam-width', + subparser.add_argument('--beam-width', type=int, - default=None, + default=2, help='Beam width') - return subparser - + subparser.set_defaults(func=evaluate_from_args) -def logsumexp(prev: torch.FloatTensor, - current: torch.FloatTensor, - i: int, - samples_per_batch: int): - # NOTE: n is number of samples - current_avg = current.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() - if prev is None: - return current_avg - a = torch.max(prev, current_avg) - sumexp = torch.exp(prev - a) * i / (i + 1) + torch.exp(current_avg - a) / (i + 1) - return a + torch.log(sumexp) + return subparser def evaluate_perplexity(model: Model, sampler: Model, - num_samples: int, instances: Iterator[Instance], data_iterator: DataIterator, cuda_device: int, @@ -112,11 +100,12 @@ def evaluate_perplexity(model: Model, for batch, _ in generator_tqdm: # We need sequence length to help compute perplexity + batch_size, _ = batch['source']['tokens'].shape n_tokens = util.get_text_field_mask(batch['source']).float().sum(dim=-1) if denom is None: - denom = n_tokens + denom = n_tokens.sum() else: - denom += n_tokens + denom += n_tokens.sum() summand = util.move_to_device(summand, cuda_device) batch = util.move_to_device(batch, cuda_device) @@ -132,13 +121,15 @@ def evaluate_perplexity(model: Model, model_output = model(**sample) model_logp = model_output['logp'] + model_logp = model_logp.view(batch_size, beam_width) + model_logp = torch.logsumexp(model_logp, -1) - print(torch.exp(-model_logp / n_tokens)) + print(torch.exp(-model_logp.sum() / n_tokens.sum())) if summand is None: - summand = model_logp + summand = model_logp.sum() else: - summand += model_logp + summand += model_logp.sum() ppl = torch.exp(-summand / denom) @@ -165,6 +156,7 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: sampler = sampler_archive.model sampler.eval() + # Load the evaluation data. NOTE: We are using the model's reader! validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: @@ -185,8 +177,8 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: iterator_params['truncate'] = False iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) - metrics = evaluate_perplexity(model, sampler, args.num_samples, instances, - iterator, args.cuda_device, args.beam_width) + metrics = evaluate_perplexity(model, sampler, instances, iterator, + args.cuda_device, args.beam_width) logger.info('Finished evaluating.') logger.info('Metrics:') diff --git a/kglm/commands/evaluate_perplexity.py b/kglm/commands/evaluate_perplexity.py index 21dfeeb..d609a08 100644 --- a/kglm/commands/evaluate_perplexity.py +++ b/kglm/commands/evaluate_perplexity.py @@ -142,8 +142,6 @@ def evaluate_perplexity(model: Model, # penalized_summands = [] trajectory = np.zeros(num_samples // samples_per_batch) individual_estimates = np.zeros(num_samples // samples_per_batch) - s_probs = np.zeros((348, num_samples // samples_per_batch)) - weight = None @@ -207,6 +205,8 @@ def evaluate_perplexity(model: Model, current_avg = summand.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() instance_ppl = torch.exp(-current_avg.sum() / denom.sum()) + print(denom.sum()) + weight = logsumexp(weight, summand, i, samples_per_batch) ppl = torch.exp(-weight / denom.sum()) @@ -214,7 +214,6 @@ def evaluate_perplexity(model: Model, individual_estimates[i] = instance_ppl.item() trajectory[i] = ppl.item() - s_probs[:, i] = torch.exp(-summand.cpu() / denom.cpu()).numpy() # summands.append(summand) # # penalized_summands.append(penalized_summand) # # if i == 0: @@ -241,8 +240,7 @@ def evaluate_perplexity(model: Model, 'ppl': ppl, # 'upp': upp, 'trajectory': trajectory, - 'individual_estimates': individual_estimates, - 's_probs': s_probs + 'individual_estimates': individual_estimates } return metrics @@ -297,6 +295,5 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: if output_file: np.save(output_file + '.trajectory.npy', metrics['trajectory']) np.save(output_file + '.individual_estimates.npy', metrics['individual_estimates']) - np.save(output_file + '.s_probs.npy', metrics['s_probs']) return metrics diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index f9e5e01..6878d71 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -1,6 +1,7 @@ """ Discriminative version of EntityNLM for importance sampling. """ +from copy import deepcopy import logging from typing import Any, Dict, List, Optional, Tuple, Union @@ -299,7 +300,7 @@ def sample(self, # pylint: disable=unused-argument prev_mention_lengths = mention_lengths[:, timestep] # Update state - self._state['prev_mention_lengths'] = prev_mention_lengths.detach() + self._state = {'prev_mention_lengths': prev_mention_lengths.detach()} return { 'logp': logp, @@ -329,7 +330,7 @@ def entity_id_lookup(self): @property def mention_length_lookup(self): - mention_length_lookup = [1] + list(range(self._max_mention_length)) * self._max_embeddings + mention_length_lookup = [0] + list(range(self._max_mention_length)) * self._max_embeddings return torch.LongTensor(mention_length_lookup) def _annotation_logp(self, @@ -402,7 +403,9 @@ def _adjust_for_ongoing_mentions(self, return logp - def _top_k_annotations(self, logp: torch.FloatTensor, k: int): + def _top_k_annotations(self, + logp: torch.FloatTensor, + k: int): """Extracts the top-k annotations. Parameters @@ -462,32 +465,39 @@ def _update_beam_states(self, # Concat all the dynamic entity embeddings and trace backpointers to make sure the proper # embeddings are loaded for each beam. all_prev_entity_embeddings = logp.new_zeros(batch_size, len(beam_states), self._max_embeddings, self._embedding_dim) - all_prev_num_entities = backpointers.new_zeros(batch_size, len(beam_states)) + all_prev_num_embeddings = backpointers.new_zeros(batch_size, len(beam_states)) all_prev_last_seen = backpointers.new_zeros(batch_size, len(beam_states), self._max_embeddings) for i, beam_state in enumerate(beam_states): self._dynamic_embeddings.load_state_dict(beam_state) all_prev_entity_embeddings[:, i] = self._dynamic_embeddings.embeddings - all_prev_num_entities[:, i] = self._dynamic_embeddings.num_embeddings + all_prev_num_embeddings[:, i] = self._dynamic_embeddings.num_embeddings all_prev_last_seen[:, i] = self._dynamic_embeddings.last_seen new_beam_states: List[Dict[str, Any]] = [] for i in range(k): # Trace backpointers to get correct params self._dynamic_embeddings.embeddings = all_prev_entity_embeddings[torch.arange(batch_size), backpointers[:, i]] - self._dynamic_embeddings.num_entities = all_prev_num_entities[torch.arange(batch_size), backpointers[:, i]] + self._dynamic_embeddings.num_embeddings = all_prev_num_embeddings[torch.arange(batch_size), backpointers[:, i]] self._dynamic_embeddings.last_seen = all_prev_last_seen[torch.arange(batch_size), backpointers[:, i]] # Add and update embeddings entity_ids = output['entity_ids'][:, i] entity_types = output['entity_types'][:, i] - new_entities = entity_ids == self._dynamic_embeddings.num_embeddings + new_entities = (entity_ids == 0) & entity_types + + # Gotta make the output handle new entities correctly + # TODO: Be a better programmer + entity_ids[new_entities] = self._dynamic_embeddings.num_embeddings[new_entities] + output['entity_ids'][:, i] = entity_ids + + # Now do this right... self._dynamic_embeddings.add_embeddings(timestep, new_entities) self._dynamic_embeddings.update_embeddings(hidden=hidden, update_indices=entity_ids, timestep=timestep, mask=entity_types) - new_beam_states.append(self._dynamic_embeddings.state_dict()) + new_beam_states.append(deepcopy(self._dynamic_embeddings.state_dict())) return new_beam_states @@ -745,7 +755,6 @@ def _forward_loop(self, return output_dict - def reset_states(self, reset: torch.ByteTensor) -> None: """Resets the model's internals. Should be called at the start of a new batch.""" if reset.any() and (self._state is not None): diff --git a/kglm/models/entity_nlm.py b/kglm/models/entity_nlm.py index f860e74..0d1d413 100644 --- a/kglm/models/entity_nlm.py +++ b/kglm/models/entity_nlm.py @@ -249,6 +249,7 @@ def _forward_loop(self, current_entity_ids = entity_ids[:, timestep] current_mention_lengths = mention_lengths[:, timestep] current_hidden = self._dropout(hidden[:, timestep]) + current_mask = mask[:, timestep] next_entity_types = entity_types[:, timestep + 1] next_entity_ids = entity_ids[:, timestep + 1] @@ -281,7 +282,7 @@ def _forward_loop(self, # We only predict the types / ids / lengths of the next mention if we are not currently # in the process of generating it (e.g. if the current remaining mention length is 1). # Indexing / masking with ``predict_all`` makes it possible to do this in batch. - predict_all = (current_mention_lengths == 0) & next_mask + predict_all = (current_mention_lengths == 0) & next_mask & current_mask if predict_all.any(): # Equation 3 in the paper. @@ -350,10 +351,10 @@ def _forward_loop(self, vocab_features[next_entity_types] = vocab_features[next_entity_types] + entity_embeddings if (~next_entity_types).any(): vocab_features[~next_entity_types] = vocab_features[~next_entity_types] + context_embeddings - vocab_logits = self._vocab_projection(vocab_features[next_mask]) + vocab_logits = self._vocab_projection(vocab_features[next_mask & current_mask]) vocab_logp = F.log_softmax(vocab_logits, -1) - _vocab_loss = -vocab_logp.gather(-1, next_tokens[next_mask].unsqueeze(-1)) - logp[next_mask] += -_vocab_loss.squeeze() + _vocab_loss = -vocab_logp.gather(-1, next_tokens[next_mask & current_mask].unsqueeze(-1)) + logp[next_mask & current_mask] += -_vocab_loss.squeeze() # _vocab_loss = F.cross_entropy(vocab_logits, next_tokens, reduction='none') # _vocab_loss = _vocab_loss * next_mask.float() @@ -407,12 +408,16 @@ def _forward_loop(self, def reset_states(self, reset: torch.ByteTensor) -> None: """Resets the model's internals. Should be called at the start of a new batch.""" + if reset.all(): + self._state = None if reset.any() and (self._state is not None): # Zero out any previous elements - self._state['prev_entity_types'][reset].zero_() - self._state['prev_entity_ids'][reset].zero_() - self._state['prev_mention_lengths'][reset].zero_() - self._state['prev_contexts'][reset].zero_() + for field in self._state['prev_tokens']: + self._state['prev_tokens'][field][reset] = 0 + self._state['prev_entity_types'][reset] = 0 + self._state['prev_entity_ids'][reset] = 0 + self._state['prev_mention_lengths'][reset] = 0 + self._state['prev_contexts'][reset] = 0 # Reset the dynamic embeddings self._dynamic_embeddings.reset_states(reset) diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index 79bdf19..ac4ec74 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -45,9 +45,12 @@ def __init__(self, out_features=embedding_dim, bias=False) - self.embeddings: torch.Tensor = None # Storage for embeddings - self.num_embeddings: torch.Tensor = None # Tracks how many embeddings are in use - self.last_seen: torch.Tensor = None # Tracks last time embedding was seen + self.register_buffer('embeddings', None) + self.register_buffer('num_embeddings', None) + self.register_buffer('last_seen', None) + # self.embeddings: torch.Tensor = None # Storage for embeddings + # self.num_embeddings: torch.Tensor = None # Tracks how many embeddings are in use + # self.last_seen: torch.Tensor = None # Tracks last time embedding was seen def reset_states(self, reset: torch.ByteTensor) -> None: """ From 2385d44744a8d04a3ccfe6c283d286b016536475 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Wed, 20 Nov 2019 13:46:27 -0800 Subject: [PATCH 12/35] Reordered KglmDisc methods --- kglm/models/kglm_disc.py | 88 ++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index f714cc6..d79823a 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -128,6 +128,50 @@ def __init__(self, initializer(self) + @overrides + def forward(self, # pylint: disable=arguments-differ + source: Dict[str, torch.Tensor], + reset: torch.Tensor, + metadata: List[Dict[str, Any]], + mention_type: torch.Tensor = None, + raw_entity_ids: Dict[str, torch.Tensor] = None, + entity_ids: Dict[str, torch.Tensor] = None, + parent_ids: Dict[str, torch.Tensor] = None, + relations: Dict[str, torch.Tensor] = None, + shortlist: Dict[str, torch.Tensor] = None, + shortlist_inds: torch.Tensor = None) -> Dict[str, torch.Tensor]: + + # Tensorize the alias_database - this will only perform the operation once. + alias_database = metadata[0]['alias_database'] + alias_database.tensorize(vocab=self.vocab) + + # Reset the model if needed + if reset.any() and (self._state is not None): + for layer in range(self._num_layers): + h, c = self._state['layer_%i' % layer] + h[:, reset, :] = torch.zeros_like(h[:, reset, :]) + c[:, reset, :] = torch.zeros_like(c[:, reset, :]) + self._state['layer_%i' % layer] = (h, c) + self._recent_entities.reset(reset) + + if entity_ids is not None: + output_dict = self._forward_loop( + source=source, + alias_database=alias_database, + mention_type=mention_type, + raw_entity_ids=raw_entity_ids, + entity_ids=entity_ids, + parent_ids=parent_ids, + relations=relations, + shortlist=shortlist, + shortlist_inds=shortlist_inds) + else: + # TODO: Figure out what we want here - probably to do some king of inference on + # entities / mention types. + output_dict = {} + + return output_dict + def sample(self, source: Dict[str, torch.Tensor], target: Dict[str, torch.Tensor], @@ -329,50 +373,6 @@ def sample(self, logp = mention_logp + new_entity_logp + derived_entity_logp return {'sample': sample, 'logp': logp} - @overrides - def forward(self, # pylint: disable=arguments-differ - source: Dict[str, torch.Tensor], - reset: torch.Tensor, - metadata: List[Dict[str, Any]], - mention_type: torch.Tensor = None, - raw_entity_ids: Dict[str, torch.Tensor] = None, - entity_ids: Dict[str, torch.Tensor] = None, - parent_ids: Dict[str, torch.Tensor] = None, - relations: Dict[str, torch.Tensor] = None, - shortlist: Dict[str, torch.Tensor] = None, - shortlist_inds: torch.Tensor = None) -> Dict[str, torch.Tensor]: - - # Tensorize the alias_database - this will only perform the operation once. - alias_database = metadata[0]['alias_database'] - alias_database.tensorize(vocab=self.vocab) - - # Reset the model if needed - if reset.any() and (self._state is not None): - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - self._recent_entities.reset(reset) - - if entity_ids is not None: - output_dict = self._forward_loop( - source=source, - alias_database=alias_database, - mention_type=mention_type, - raw_entity_ids=raw_entity_ids, - entity_ids=entity_ids, - parent_ids=parent_ids, - relations=relations, - shortlist=shortlist, - shortlist_inds=shortlist_inds) - else: - # TODO: Figure out what we want here - probably to do some king of inference on - # entities / mention types. - output_dict = {} - - return output_dict - def _encode_source(self, source: Dict[str, torch.Tensor]) -> torch.Tensor: # Extract and embed source tokens. From cc8e010105804c0ff543bfd0702af714813f99cc Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 25 Nov 2019 16:35:07 -0800 Subject: [PATCH 13/35] BUGFIX: Positional arguments in WeightDroppedLstm --- kglm/models/entity_disc.py | 5 ++++- kglm/models/entity_nlm.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 6878d71..7bb5d6a 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -77,7 +77,10 @@ def __init__(self, self._max_embeddings = max_embeddings self._sos_token = self.vocab.get_token_index('@@START@@', 'tokens') self._eos_token = self.vocab.get_token_index('@@END@@', 'tokens') - self._rnn = WeightDroppedLstm(num_layers, embedding_dim, hidden_size, variational_dropout_rate) + self._rnn = WeightDroppedLstm(num_layers=num_layers, + input_embedding_dim=embedding_dim, + hidden_size=hidden_size, + dropout=variational_dropout_rate) self._state: Optional[StateDict] = None # Input variational dropout diff --git a/kglm/models/entity_nlm.py b/kglm/models/entity_nlm.py index 0d1d413..517ccd9 100644 --- a/kglm/models/entity_nlm.py +++ b/kglm/models/entity_nlm.py @@ -81,7 +81,10 @@ def __init__(self, self._tie_weights = tie_weights self._variational_dropout_rate = variational_dropout_rate self._dropout_rate = dropout_rate - self._rnn = WeightDroppedLstm(num_layers, embedding_dim, hidden_size, variational_dropout_rate) + self._rnn = WeightDroppedLstm(num_layers=num_layers, + input_embedding_dim=embedding_dim, + hidden_size=hidden_size, + dropout=variational_dropout_rate) self._state: Optional[StateDict] = None # Input variational dropout From c28fa5e2ecd89a58b3cc7e43f882b2ac8391c0a6 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 25 Nov 2019 16:35:51 -0800 Subject: [PATCH 14/35] BUGFIX: Don't register dynamic embeddings as a buffer --- kglm/modules/dynamic_embeddings.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index ac4ec74..79bdf19 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -45,12 +45,9 @@ def __init__(self, out_features=embedding_dim, bias=False) - self.register_buffer('embeddings', None) - self.register_buffer('num_embeddings', None) - self.register_buffer('last_seen', None) - # self.embeddings: torch.Tensor = None # Storage for embeddings - # self.num_embeddings: torch.Tensor = None # Tracks how many embeddings are in use - # self.last_seen: torch.Tensor = None # Tracks last time embedding was seen + self.embeddings: torch.Tensor = None # Storage for embeddings + self.num_embeddings: torch.Tensor = None # Tracks how many embeddings are in use + self.last_seen: torch.Tensor = None # Tracks last time embedding was seen def reset_states(self, reset: torch.ByteTensor) -> None: """ From 05a9db1d87d71675c9de3d9f44c8ca202b19fb0f Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 25 Nov 2019 16:36:33 -0800 Subject: [PATCH 15/35] Light refactoring of WeightDroppedLstm.__init__() --- kglm/modules/weight_drop.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kglm/modules/weight_drop.py b/kglm/modules/weight_drop.py index 3b42c93..34278a4 100644 --- a/kglm/modules/weight_drop.py +++ b/kglm/modules/weight_drop.py @@ -46,14 +46,19 @@ def reset(self): class WeightDroppedLstm(torch.nn.Module): def __init__(self, num_layers: int, - embedding_dim: int, + input_embedding_dim: int, hidden_size: int, - dropout: float) -> None: + output_embedding_dim: Optional[int] = None, + dropout: Optional[float] = 0.0) -> None: super().__init__() self._num_layers = num_layers - self._embedding_dim = embedding_dim + self._input_embedding_dim = input_embedding_dim self._hidden_size = hidden_size + if output_embedding_dim is not None: + self._output_embedding_dim = output_embedding_dim + else: + self._output_embedding_dim = input_embedding_dim self._dropout = dropout self._state: Optional[StateDict] = None @@ -61,11 +66,11 @@ def __init__(self, rnns: List[torch.nn.Module] = [] for i in range(num_layers): if i == 0: - input_size = embedding_dim + input_size = self._input_embedding_dim else: - input_size = hidden_size + input_size = self._hidden_size if i == num_layers - 1: - output_size = embedding_dim + output_size = self._output_embedding_dim else: output_size = hidden_size rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) From 019340d735c7840f7a3d9e3578b2d229000fdb6f Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 25 Nov 2019 16:42:36 -0800 Subject: [PATCH 16/35] Updated KglmDisc to use WeightDroppedLstm --- kglm/models/kglm_disc.py | 91 ++++++++++--------------------------- kglm/modules/weight_drop.py | 4 +- 2 files changed, 27 insertions(+), 68 deletions(-) diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index d79823a..873164d 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -14,7 +14,7 @@ import torch.nn.functional as F from kglm.data import AliasDatabase -from kglm.modules import (embedded_dropout, LockedDropout, WeightDrop, KnowledgeGraphLookup, +from kglm.modules import (embedded_dropout, LockedDropout, WeightDroppedLstm, KnowledgeGraphLookup, RecentEntities) from kglm.nn.util import nested_enumerate, parallel_sample from kglm.training.metrics import Ppl @@ -83,20 +83,12 @@ def __init__(self, token_embedding_dim = token_embedder.get_output_dim() self.entity_embedding_dim = entity_embedding_dim self.token_embedding_dim = token_embedding_dim - - rnns: List[torch.nn.Module] = [] - for i in range(num_layers): - if i == 0: - input_size = token_embedding_dim - else: - input_size = hidden_size - if (i == num_layers - 1): - output_size = token_embedding_dim + 2 * entity_embedding_dim - else: - output_size = hidden_size - rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) - rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns] - self.rnns = torch.nn.ModuleList(rnns) + rnn_output_dim = token_embedding_dim + 2 * entity_embedding_dim + self._rnn = WeightDroppedLstm(num_layers=num_layers, + input_embedding_dim=self.token_embedding_dim, + hidden_size=self._hidden_size, + output_embedding_dim=rnn_output_dim, + dropout=self._wdrop) # Various linear transformations. self._fc_mention_type = torch.nn.Linear( @@ -111,8 +103,6 @@ def __init__(self, if tie_weights: self._fc_new_entity.weight = self._entity_embedder.weight - self._state: Optional[Dict[str, Any]] = None - # Metrics self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) @@ -146,13 +136,7 @@ def forward(self, # pylint: disable=arguments-differ alias_database.tensorize(vocab=self.vocab) # Reset the model if needed - if reset.any() and (self._state is not None): - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - self._recent_entities.reset(reset) + self.reset_states(reset) if entity_ids is not None: output_dict = self._forward_loop( @@ -190,13 +174,7 @@ def sample(self, alias_database.tensorize(vocab=self.vocab) # Reset the model if needed - if reset.any() and (self._state is not None): - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - self._recent_entities.reset(reset) + self.reset_states(reset) logp = 0.0 @@ -375,41 +353,17 @@ def sample(self, def _encode_source(self, source: Dict[str, torch.Tensor]) -> torch.Tensor: - # Extract and embed source tokens. + # Extract, embed and encode source tokens. source_embeddings = embedded_dropout( embed=self._token_embedder, words=source, dropout=self._dropoute if self.training else 0) source_embeddings = self._locked_dropout(source_embeddings, self._dropouti) + encoded_raw = self._rnn(source_embeddings) + encoded = self._locked_dropout(encoded_raw) - # Encode. - current_input = source_embeddings - hidden_states = [] - for layer, rnn in enumerate(self.rnns): - # Retrieve previous hidden state for layer. - if self._state is not None: - prev_hidden = self._state['layer_%i' % layer] - else: - prev_hidden = None - # Forward-pass. - output, hidden = rnn(current_input, prev_hidden) - output = output.contiguous() - # Update hidden state for layer. - hidden = tuple(h.detach() for h in hidden) - hidden_states.append(hidden) - # Apply dropout. - if layer == self._num_layers - 1: - dropped_output = self._locked_dropout(output, self._dropout) - else: - dropped_output = self._locked_dropout(output, self._dropouth) - current_input = dropped_output - encoded = current_input - - alpha_loss = dropped_output.pow(2).mean() - beta_loss = (output[:, 1:] - output[:, :-1]).pow(2).mean() - - # Update state. - self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_states)} + alpha_loss = encoded.pow(2).mean() + beta_loss = (encoded_raw[:, 1:] - encoded_raw[:, :-1]).pow(2).mean() return encoded, alpha_loss, beta_loss @@ -669,18 +623,17 @@ def _forward_loop(self, @overrides def train(self, mode=True): - # TODO: This is a temporary hack to ensure that the internal state resets when the model - # switches from training to evaluation. The complication arises from potentially differing - # batch sizes (e.g. the `reset` tensor will not be the right size). In future - # implementations this should be handled more robustly. + # This is a hack to ensure that the internal state resets when the model switches from + # training to evaluation. The complication arises from potentially differing batch sizes + # (e.g. the `reset` tensor will not be the right size). super().train(mode) - self._state = None + self._rnn.reset() @overrides def eval(self): - # TODO: See train. + # See train. super().eval() - self._state = None + self._rnn.reset() def get_metrics(self, reset: bool = False) -> Dict[str, float]: out = { @@ -702,3 +655,7 @@ def get_metrics(self, reset: bool = False) -> Dict[str, float]: out['parent_ppl'] = self._parent_ppl.get_metric(reset) out['relation_ppl'] = self._relation_ppl.get_metric(reset) return out + + def reset_states(self, reset): + self._rnn.reset(reset) + self._recent_entities.reset(reset) diff --git a/kglm/modules/weight_drop.py b/kglm/modules/weight_drop.py index 34278a4..8d4a924 100644 --- a/kglm/modules/weight_drop.py +++ b/kglm/modules/weight_drop.py @@ -97,9 +97,11 @@ def forward(self, embeddings: torch.FloatTensor) -> torch.FloatTensor: # pylint self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_list)} return current_input - def reset(self, reset: torch.ByteTensor) -> None: + def reset(self, reset: torch.ByteTensor = None) -> None: """Resets the internal hidden states""" # pylint: disable=invalid-name + if reset is None: + self._state = None if self._state is None: return for layer in range(self._num_layers): From 046a498bee87e0183cb345fce55bc1768475afa2 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Mon, 25 Nov 2019 17:01:02 -0800 Subject: [PATCH 17/35] Updated Kglm to use WeightDroppedLstm --- kglm/models/kglm.py | 85 ++++++-------------------- kglm/tests/fixtures/kglm.model.tar.gz | Bin 47733 -> 47776 bytes 2 files changed, 20 insertions(+), 65 deletions(-) diff --git a/kglm/models/kglm.py b/kglm/models/kglm.py index dcf4cbb..074bdcc 100644 --- a/kglm/models/kglm.py +++ b/kglm/models/kglm.py @@ -15,8 +15,8 @@ import torch.nn.functional as F from kglm.data import AliasDatabase -from kglm.modules import ( - embedded_dropout, LockedDropout, WeightDrop, KnowledgeGraphLookup, RecentEntities) +from kglm.modules import (embedded_dropout, LockedDropout, WeightDroppedLstm, + KnowledgeGraphLookup, RecentEntities) from kglm.nn.util import nested_enumerate, parallel_sample from kglm.training.metrics import Ppl @@ -86,20 +86,12 @@ def __init__(self, token_embedding_dim = token_embedder.get_output_dim() self.entity_embedding_dim = entity_embedding_dim self.token_embedding_dim = token_embedding_dim - - rnns: List[torch.nn.Module] = [] - for i in range(num_layers): - if i == 0: - input_size = token_embedding_dim - else: - input_size = hidden_size - if (i == num_layers - 1): - output_size = token_embedding_dim + 2 * entity_embedding_dim - else: - output_size = hidden_size - rnns.append(torch.nn.LSTM(input_size, output_size, batch_first=True)) - rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in rnns] - self.rnns = torch.nn.ModuleList(rnns) + rnn_output_dim = token_embedding_dim + 2 * entity_embedding_dim + self._rnn = WeightDroppedLstm(num_layers=num_layers, + input_embedding_dim=self.token_embedding_dim, + hidden_size=self._hidden_size, + output_embedding_dim=rnn_output_dim, + dropout=self._wdrop) # Various linear transformations. self._fc_mention_type = torch.nn.Linear( @@ -129,8 +121,6 @@ def __init__(self, if tie_weights: self._fc_generate.weight = self._token_embedder.weight - self._state: Optional[Dict[str, Any]] = None - # Metrics self._unk_index = vocab.get_token_index(DEFAULT_OOV_TOKEN) self._unk_penalty = math.log(vocab.get_vocab_size('tokens_unk')) @@ -293,13 +283,7 @@ def sample(self, alias_database.tensorize(vocab=self.vocab) # Reset - if reset.any() and (self._state is not None): - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - self._recent_entities.reset(reset) + self.reset_states(reset) # Get source tokens source_tokens = source['tokens'] @@ -405,13 +389,7 @@ def forward(self, # pylint: disable=arguments-differ alias_database.tensorize(vocab=self.vocab) # Reset the model if needed - if reset.any() and (self._state is not None): - for layer in range(self._num_layers): - h, c = self._state['layer_%i' % layer] - h[:, reset, :] = torch.zeros_like(h[:, reset, :]) - c[:, reset, :] = torch.zeros_like(c[:, reset, :]) - self._state['layer_%i' % layer] = (h, c) - self._recent_entities.reset(reset) + self.reset_states(reset) if target is not None: output_dict = self._forward_loop( @@ -680,42 +658,17 @@ def decode(self, output: Dict[str, Any]): return output def _encode_source(self, source: Dict[str, torch.Tensor]) -> torch.Tensor: - - # Extract and embed source tokens. + # Extract, embed and encode source tokens. source_embeddings = embedded_dropout( embed=self._token_embedder, words=source, dropout=self._dropoute if self.training else 0) source_embeddings = self._locked_dropout(source_embeddings, self._dropouti) + encoded_raw = self._rnn(source_embeddings) + encoded = self._locked_dropout(encoded_raw) - # Encode. - current_input = source_embeddings - hidden_states = [] - for layer, rnn in enumerate(self.rnns): - # Retrieve previous hidden state for layer. - if self._state is not None: - prev_hidden = self._state['layer_%i' % layer] - else: - prev_hidden = None - # Forward-pass. - output, hidden = rnn(current_input, prev_hidden) - output = output.contiguous() - # Update hidden state for layer. - hidden = tuple(h.detach() for h in hidden) - hidden_states.append(hidden) - # Apply dropout. - if layer == self._num_layers - 1: - dropped_output = self._locked_dropout(output, self._dropout) - else: - dropped_output = self._locked_dropout(output, self._dropouth) - current_input = dropped_output - encoded = current_input - - alpha_loss = dropped_output.pow(2).mean() - beta_loss = (output[:, 1:] - output[:, :-1]).pow(2).mean() - - # Update state. - self._state = {'layer_%i' % i: h for i, h in enumerate(hidden_states)} + alpha_loss = encoded.pow(2).mean() + beta_loss = (encoded_raw[:, 1:] - encoded_raw[:, :-1]).pow(2).mean() return encoded, alpha_loss, beta_loss @@ -731,7 +684,6 @@ def _mention_type_loss(self, mention_loss = sequence_cross_entropy_with_logits(logits, mention_type, mask, average='token') - # if not self.training: self._new_mention_f1(predictions=logits, gold_labels=mention_type, @@ -1061,13 +1013,13 @@ def train(self, mode=True): # batch sizes (e.g. the `reset` tensor will not be the right size). In future # implementations this should be handled more robustly. super().train(mode) - self._state = None + self._rnn.reset() @overrides def eval(self): # TODO: See train. super().eval() - self._state = None + self._rnn.reset() def get_metrics(self, reset: bool = False) -> Dict[str, float]: out = { @@ -1095,3 +1047,6 @@ def get_metrics(self, reset: bool = False) -> Dict[str, float]: out['relation_ppl'] = self._relation_ppl.get_metric(reset) return out + def reset_states(self, reset): + self._rnn.reset(reset) + self._recent_entities.reset(reset) diff --git a/kglm/tests/fixtures/kglm.model.tar.gz b/kglm/tests/fixtures/kglm.model.tar.gz index 4f1b44fcbd877b0a2a7c5c6acefc562e064b424c..3ff743b80b0410ddee92dcda31c3e86ad8bcc0d2 100644 GIT binary patch literal 47776 zcmV(wKW+tM~j&KX1mM3RU|&SA|W2?i7u z6DE)-C~3n+1x1l0Ns%sKZ4)O(Hx@9nB%>U1}!Wpcrn9NpR?VZftZ=jLAR2x)e;h5saheglTu9 zCiH-7^E9IUxRHM_ESQHB_g6do{kQ=kzA>TkU1E#B2x6kbLb<;LroT(Lp%Jru=LE&Y z@TAjz$$I3GvC#oRe4m?*!1ABXstz5n=3f<*6s z{WX?T>m@+|97+b zue0<&BANd(_rJ;|H#Eq1UQp<)5N^yrsDF#se;LnirT-zp{9)^V70FoMZTG9){$T!F zuk1Ff`)2%ajz#yUyJxhY$3c58ucZd|)cpU0sNJvrbpQXorSlGGpl?7}XjD{J+Uzw= zjm@q8A`T1ki;n2Z7w_KY#U>ymC@?nc-}U|ou908Ntia#fe+19L@W_ZrZe&Cd@1o+( z0X)xn`c}Q>p{qdzMMVaLbk&^sUj&g++|Y1d&-%Y^Kv?u|7f1Koo@3)17!=?a*Hw6y2y;8qJA53x+^j^%r83b57XCtZcs#E&|mG-4h)SB3gC@J zasN&Fy9xh_*2D1kt6n03?|(o4e|~=Se`9V4-~TuJpP9L}m5udZ_CFim{%2uhZuY280;<#&Sc$VkCHbtbc51SfDSjqs2r<`_46&;0WsLdy95i!_)Me z6~qzh`G|yjSfn3!e9ub>jp(%RORBhqbnKvXQx)<#b*M{Jx2 zpNEo%L>fojL&AeUjU$l}(q*G0mm}rHk@oJ}W$17F#ZOCRLU@LR2!;HO$i~{%!p6p$ zBO4;>(ZVB;>mpF@B9QMPQ0O6018d+#7|hjg5^h&zHU-qNBG6 z^EgzxIP$wV`t@+A_Hd|iIqF`V{@yxW9DiHfjCnV4*Dd~o$2T;@H_Viy(W^tYytvt# zat83&^7wgdnq6%AU2IxCY}!3+I$Vyf7iZxA3AT`KHoZTw+3~_R=qH>0FSfxwYz94S zL%1A6FOHG7)}Pt@yI%HqX8Z>(J5w8TjtMVS`aCMrZmLv{nFoI=$GnHeg3Gb=;#mDh znjV*|d!ey3HM6#{vbE&c{GhS@L1WiNW8XvLz~wMr9OB)-yBZ=QjJtls7@PhrV!Q!t z9tV2$#Kz3b&fJva_%muwzoO>c6E&9}9#<}Bs29i0Tl06G-pawfS8ps#O)V^}Im3QZ z4gWbmvpH&dts~hHa)|M9LoLN8P74j=yp*``M-NQ47%L()1g#Vrz zvtFwp;ty3|VQpv4iTp_w^^1zrLlxaa6~pCly*RP|iK-`XbN`)J^L|pz|3wwoLlxgc zwSdc6=*3C+Jws-_RKcR&Gh|_9X>Q3`{3An&KQgqWD?>>=9LZcxiWevK-*fahllBJ= zYfDR8&e9(o=|4C!x;Qd>II_5$Y%k8T->bm7*S=p1C(jKZZ zE@zb&XZ3%g>Iq!=zw@f%r&pD~yjs)a)!H7abzIJRFV2SFGi2UN6;$=^lchOt=;LhU zWoSN+XHz#%Dra+7hPL!@Y~^ycd2zP?dyXDwcKm_E!ot#$v-1bXt{)t`yEyjraO~xB z_IYvk_Y5Gx0lt6zCi>@eaNrN%R=gv_IryW*s=L62x^i-;Cnq&M1hrfa>&2noeR?D4 z@_>VXfVZ*X*uyBMFT0Qb0obmq zrJM-qnamoyfF(SX(l{r10~*inQ$66PxtudzoU`8Yzk_$%e6DwBD^n{wbIy4lv;+_O zLN|0O=VF)5mwI3?b2(SMI9Gq!JS!-o>mL=rY`)gpasD|~UjJcp)30grMo+A7_JB8Y zIW1nCTfc1n1>SA*?LXLTVabEO!-H1hLEr6$PUYO|viW`w>;o?6p%3@7=Y;a6Tq*K6MZGKev?6 z9{e#q9{zn_;CxBod=2TA%Jnj&bG{{TzW0a~dL6N&`S|(x5~BHeKYP?<{xRZ33v{ap z{!&r?;}jSz)U7J~OSRkLu*iTpL4m%3(UDP+vD|2pZXMBIIzm7CN3>YCK>U|L;%|*5 zTB2Jl`AaPI+YAyd)vX}?OF^;sAthR-TTk{+KxWZ$-3s!50{VH+MJsgcDgFt_JX)z+ zq0gUyezwYJ)Z-t%FwKn&%*YWo@p$hai=?V5WTnP0x4fMKiiS)J>iT1X?i1oG{iucy1 zBzoIRBzwDOrFt8|q)Hf8j0k7dVRh>zlx0%r)?J-URR0ZU9q(Y2cq;3nQh~&?-I~PmQPKRTaZT29yIQ z^D+#`Ifj=Wm!a(pZS2q;f))*p*iasWH4P7#p|N{W=KQf5Yp*@zh)Ov-eX0dq&O8b0 z4Sk{X_%doO;6dgqy(GoipQ&a0X)v9Y3X`Lk!nc+C$!UHyaC$ZngV$}tRbza}jFL3; zyWqzRK|$!$$b)>Ja2P-Z>*jy=!RZz(K4>h2=eDib5Rp&QmHFxpK0n3s@dsejl;OnG zX%CrNR)t01?Qw;k6`HM`#2zhK2yJ&vq3^^|_|)qncKTm{dEw1))#V|v=-i7z=Y;C? z`W!^r(b+gPZ%8daj>7DmQnX!h02l0-ivyB!QE^iZQSdNfzpiZn(?osHb{&GH>lVPr z3;Lkr3$j4mA!_XEVgEEOj!N{lx1NFyJ93xsMx#2o4+k#LVpADQ`S%`mm8phOn(50K2 zpgwCoOeo~}XkZ8a4=bT4FBe^c04mn3fm0TP@#Ez>a!sol>{ixNUAt}Ac)5)Bb3cP; zJr3cUv(vC$;VRz09*fH|&54V`L2|n+2-8=6rwp&gq;7( zmKgW2jd$vM=;Ymg!gkq(C^7* zpq)<$n>2?!o-r2X`4eHR@+TC?kYv<#N08$llcBhwgr1z11n-g^@k;J`IMtvGX)_LC z9)Hid^NNU~vuw*i)hnOX-lPi_$0=DEcaONMYT%WGJgnc9Loaw1&`ARR_&_d`DCerc z(3%*~{T9JK*DNGoBEn&|ixbSq%x8C4O~HPj3t{`m0rZMZ7_IakgmLwGIKuW5Gx_pT zyt6U{8fEh_X}1X+%e%?M&-6iIJ4Gz*Fu?$$*Q6{o3avUS&}qI2G|Lr1kd6?wK5YY? zQq8oy%o!F;{6xR-)id1t31nnsf7(ZK5k`woMLA9?+~MBDr;ed-SI3TzWN__c3G8JdycD`?Hriu#C$(itR^FnAm_MQe-iejn2 z73Q4aN$PTDIhxfhMv*anVSP(53OncGf@ROhBhf{W%$W$Tbz)d`PYo}WjYrO-4#GWE z2nQE8(}%fBaGBO<@Use|4N_Ln*=WVYsZ~PZjMb=WC;~d%i72;V5UTskq+7{*MocXo zM_TKXxyr-A`MeR{=PX0@W0^Q{xe2~pD@pF0$)F|viGWr0)LvsOC>(f3pT#QSk_{20 z&e)Mo!8I^6ZWwvM<%hio`anx-D!doTgjMzquxptd)(KTHO;I%WtD?buFB z?p2W}ZymIIFqw`^e@z5To{@+rnW*ROL&nv4q2B8SaD1BpT8ubBO#>3aziKQU9<2s* zuNq>6(lmHmm;=G*!!c^ZIcERhm6(6t3a)E=g3ic!bmP$y9JD?au1d#KPTN+NKV}Tp z&-TUxy#Ddn(gB%iW3b=pLP+7o>`jacJkpv0zHZIbH$t60*z5+`QW@lVrWfoyIUH>= zOPI(qNw6IhJe!NuR z3kz<_KxnA~#BBJ)PCvYX6n>Zv0i5A9cGGw2WqO1pJY2EXKh1 z+t|H>GOP-=$JF3J zcvq$RgSj{{DId13-b&yThio%^N~7#^!Gx~B_uf+UqLw0S z5Ew`n7|+M7JR|b?^;eqFl8gyf!i@cc&3MkhpJc`CV+~?_$fAQL@IJ;LMtn4|?|7_= zA(As_TB0MQ6OqOQt6F4ycwLAvCmc!k157ML!ljwP21(=!jh+4bfriP3@ zd9`m9?r1Rt#wHE(tXJdx++gO+5nuYEUkVJ4QbcdB<78`}C+sO*BS@Y!9ji9Kp%IA; z?y{H$3w`gh<+t}SINlv^yv@aF!{V@?%>*JcXE$-<%!P*u2B3cN9hDg9Ntjn9P_SVH zp2sw3StyFekxT6tjhIEvuT`SvsX8Xb;u|~k;u`wkwGg}+e2+8=iNHwlKIj@)N#w;- zK~6FmX6~Fr%3W8Z#gn7-iPSP!Ur+|u9*HtL9NgfWOC=sVA%OB;ry1L}F=#9@l(x06 zg!y{TFhOEAWTjaWmE~!$;;;hBq#PoF=BKFhy6NaMULONG*RZb#?W5DCWy5tZDNwVx zK@{BQV!tbKppn)Grhd4@%-NU*^MzfQJqL49|Hu6HLUIl0_mRN`3ya`G@*XlrYa^cW zGr;wl+sL{p!(oa97dO2-L0^{+fuVb+nS>#brci$3ZNy%q0SaCyC#?&&=38#c+7P1&*^Ffvs?X z3>|6447bZ5p=Zs&8S!g zD-6kxAJb}gTj((I17*yn7uD3D_3lAhF7wSUW8*YBFjWOgN1mn1{x*1rvm8$!l?Fb8 z6Le*YB*v&8LM3-I%-H;d8PjhHOpPK?lX?Js9@LQqwVUw9$nD6<5v3W;S7_ zcheDO$FvywMLC~dDJ+0Z?PGzAw!+~v*P^zhH#xF14eGrG@P%F?PLzEFF)e#)6Q8}u zdwT^y+t&@ZZ(Tz8sS+H}I|15aIXJ*!Gh8#eMr~admvKnnl;sAyqh_$3Vg)MjMIjLCmUHB8Hv>mtI6JT z5_o!TC0Xm0jfXdngM)c9X~g4k)NGv@u3xH%2Tn%8JYq`DscBaiG zS=4u`G8rK_msZByrTs!z6TJjQ`V8lwji@0hUq2u99?e3`6UK_>Cc4lf7!{gYsomaW zcp%2BC0_m5XH}r$r4@L-oewXDtb*-UNwo(L9CG-0b_G4)wFGBX2VmubBw8W593AdG zrt4Lj$w5zE{R^bwx%oxF>{5aoN(H!~$_@D@jz%$MNf=w8ij!j*Tpl+R&7Z%b0lO>l zk(o9u9IQ@mL=-}wQa&&#I!%Ns6yc#|Chb=>9BGpxwoY@Yy{F+1wrY*+q4}cts`(h5 zz2i2q7Ns=&dLMjkFdpOVXQTDOSbR2a2JRkOfa9v?;HKks=u zlVeam>jjPZo=zJ-h{6@9z>3eyAg|E~Dze<@&6aYka9c_`U%n%@ifZURzZgtH_AxiA zHKF}f7@T>sgtePJ4muXTCo7G|Ljg_1e&gy%ySN;iTP+Ws9ciGsQwq+#o`tR**V&79 z(@{rW1v8QYfZJS&Czvl}LqR!|n-3u&BEmR5@+@)k(I5ho7NTK57U_FyEQVIBfMPpu zYzx0YQx^|p$MktXO5GEXjjE-mUl&8e9Ai+;ufPLmRza-UYG`?@i^EME(DnI!_G%yp zU)z-7^zb7z;Fu-WY;Xj}t5r2Ng$AU-yOAW%n}sJgkHMRbOYob?X2M3hJhueT@Am+=fEhFQ810Hre%}9(-v2M zICXL~`WRlJts>GOaCZodSUDC>th+#F6C3#Q@FhE5@f&%DkvP&e7U{QKoU_6K&aO-& z3!2N2?^!N0=&=FZved^tXVuW-lPt|Vnn|CEmyi#elF{jjE9}D&FziePz1aUMyQjH` z%&OLfW1)P|$A1gGKD87spAZ3yK{slRlU9MX>_(a$T8XhLN!0tA3r3g-f|h+D-RpU{ zcHhf9q}9GCYS$Nb6i3vCeK5cWZDKH@gM-tm2GE`S7*f267o z<*==G1YNUXB#!13W6FRC5O3N*)0ET6&S?eUbs-h}R}>?eEs6;tHaL2@ElmD60%i#1 zID9tb;oB!;P)I3W6x5p^ZR2d2jU1~_@I~}@2Xg2Bw7sKGQ;biyJp|!7FOsR{-Blg3Ye5m2`qgUp; zQEv@pT<}eX-ujY+WzK3~wNM+gbO1u9FU2b_rNOH<6HnO#jBiedk{CsJ-j)T`dfK@4 z2ruU4yjf#LI8+$qfx*Z_WdAf9aJ)SOgYDOopZCZMCXctzXOj*3FKSw=77~-2DOCFA zS@z9!t>Wz&I?7Ujs`$ z3aP`G>txq?u0zl>Js7{n6+-rmKzr|avQb`>3ZC#J7gMz{@YZrNqs$uTNEaa0BM@Y8 znL4zXFxlzT=##kwwubE^BLq^2#fPyd;1&o@CSEY|wj<3hPKLOfIW(ig1NR=ZCW3qH zA>Kq16SwAK-;;@y3(H|?zB@hOVgWgQazT_c$)R~-J{C)e!&0k28fotfb(>n5+13tt z%h?)t?RUkJM>#ldm;{2I02b$0z@_Aoa6@r21{WpKr_JHi_fZH$PFsORpP!JK6Q%G; zVLVa17m0G~TtG}ufvVe=L2PXS+*g+;<70%dMr$RS3eRS$cO=vNo0}QF*5hO_cM1DO zM-Zo`C*rL+BQSAg9%F6uh&=d^!b~akC({O%qg!V%^LV5k!nBDnRxy(ds4fBVGEel1 z%>e%+MW`qCnykqhfP6dnYt2UY0loADnjMq|xr!^G#QTwhi^4W?x+tHf$S1H@S6`t~ z2f4&pKNa5@?4z5GFG2gl{^))219i&M1!HbKRXVf;UYO6tIQ?|UQ#(tA2UgJB^KVG= z_bgx@DMR_#V27630Z6BNg5uT(wZ7Mu)kTx z47>N0?w*nbAExQhy(e?<`c`$KYrKqzZaKof&P;+yTniv^6QKXDe7wfFK}IQPVRWZ8 z)E73Anrss|C`8Ek@tthr?y2DZ)fZjd*8s1z(5<u_L0cox>ioY9y$>PrF8kLM&O{b&8xOjBjyPRa6(MH9W zCeSd_3!iJuz$t^5(xsW^FseEdH!3ZKozmlR(xx0#AJ!N9h((emo*&3flPs*9vl?{I znWLV20jz5JK*u&^(vKG_q0?v_Xzj~{rC9;ERreElVBDWMF>NL8T)VzD`+GTKTquE0 z_YZ<$Ml0#sPeV!Dl`!DE9|Kpnj6|JRHO%>KV=>&S81)xLV!E{GS;%Vg&Hi zSWg_8-52hy^T$zgTgbB}cZX+Mt1u>i4c?xx2ArUy=@n&I)wYtUTRe`}*I$q@OA9=c z_K;k&Jx)Kl8I!PygXy6*OV%)D1+EqghOdVTKt<&gQOXU+E6NsZvsw{eQDm^`^D-PY z`x7;-cLYPZC6JnUlBhmRU^^`AAZ*)Guq!b`BiH5Fs4)ap^X7ue*LYwe(vh|wAYr_o z#!Y%b=YBrL^mkZ;OZP4VrLBjkzPcicMXbb*PyBFcOD2YFX{AEB`XK#P6uC!dfZW1D zcyhOjxVJ`t?G%F1*N4*4;Ze9FT?`v1m0;f5ayYo_9eKBOAl7+y5@g>J#WFec?;8%4 z5o^$5&>P}@XD2K2z7K93S^yK&HRxh-8+fOAh!m4nvT1h?1Z~a(=drnDQpyQ-iTNmc z{p~!=8Zm@$>T(#X;*qf2%@;4s=!csh?In+52f);{hs?CKyuE9XKI-u4!~C_*D6=>n z!&m4)McWc~Id?XG>GOn=mZdc>o@$~&^C1$XnTnF{d~y7`AiU|JgTr&mA@Id$RNAqg z`MEclaW&zUIfJ)WO~B*h1&P5rO?)4uf|A<;!DB%_8XWpazP_D^b}LrWFcm9UeP#*s zz~Kv-G_Nmw5Y@(jAzVw4z7#0J zoDx}_TC)o9aSCkRCxoZW&(>zWWMCXOA8ox8$lSq`p!ALaojLU>Ex-7Qyf~ABTZDV; z^mR4FMt5f{7-x+mFTY}J=QXh0wfwZ*e+ctp?CsiH_(pJl$@raAYy-j411cuM8)r=hQl||`j!qlyzgvs zt$H|WkJO<?2B@OJn-5KOV9(VQ5;&ohU3GU0UHx(l!_++` z^G$EkSIRBqg>GX_RsLmiAXI{^64XhB%qq3Vwd)*~)7a)Uw4=BWHotA8mv*gZ@>A|n`{u(8CTY@3x|?Wy zOg|7x?TbacnNstK891d&!Ipzw%<(K6GRwD}wo#rReI4i?Z8w+@rAtkZYBOKb66u9? z4UE*u64G%)4Af7Ir-f!0iRGDKVtX!@;P-asO6FVI5XWH^7JCvgDNVxN+e{z5Y@&Mx z-X)_;9gwM>NreSXutcJRd6#JeH6{c-g)_+LyCzsDZ%B4omywhM1=Mo>ezJ0=14P^~ z#b>@E(C+qzsJ}J`aoyP@$6Op@rV3-*0zQX<8V*>sxQtErZ6LO1wvdMtKho^#Oj=pK zn;kp&05yp`K}N(YVX?X@Je@e6eqC2tbGeTvHUDB&TOA^Vb3VJloX<$pbLAk|RTlW} z%i(H+0a%t0M(rcFGwVae>GF}rICV|9gX1!1V$B)m;6L02@4T}{{#-&l0&kM)C)w0z z(Gc|ipp8TC=CQ-(+o4>BDC1{VOiil>6YV_*+06^34FS4ItA+M{&yFf7_80dmG!4wh5A z$k^?PwBhY5s@kHBD^BmD3wO<-A1lVvB%kXvMM0fi?~~3{N@kNUs}8bJIm%GtlR_g# zE@dvPlLSF$Ni+y@A)K%)v|P1?Y;ihL6R25F#xGFCRe}$hP1zPG=RTd?aoL2deLa;m zKA}qd^QuVN#u(=DY)w=@WJJCU^(V^r-jlJ}{cy}lN0?r25Yye8iPYgdI!SIN z`Q$JJPfZz51Vg2?@8tR-ShX-At?jbChTCIyjvjua=9_`KK)4t=RGglP-&5-cNYeJv0SX3Ugfm-tBN98@rC{)r1 zzo-<^i&59EI`?*lv{?-txafv}iN4U1AmYw>5fkj$l=qY(gQ20KEj9UwM zM1MJ*my1mmu647P9~L^10SC}Zrl|`ixw&q zj@@Lky22g~xAEgDy+r1A#!#%*-$ibX4X!mP90cpXZzaY}_epEL4ZR;Rh}?api=7*! z!N#eP9?q6PpJ4|0&QgrP=qJQjra!YnUl4}KIHFu|H1R&>LqE4!;{Cynu*!KfImM(C z&4urXxp}L@i|u2Glzj&ab?E)L)6 z1}PE|40C2680Z^ASa1wEq0vc;z> zM|Ix`QT52zWL~fysB`K`KX)rst3V57`YG82*WV1HjNUMZiYHApc%~)z?hybk%wU>VOM%1KRFV@f14H}-k?+oPx;09U zDU#1+zTQ4XW#-%9-IXGsyR4Q`63_v|MK8&Wr~b5Y$1GCO?+aZN^^B+vQy`J9CXmW^ zo0>QKGYhYbW<8gigKfonGPL3z5y>}%)A5wHpBaP_aEfTC7}LA!eW{S}1v>8CT{EAg#=wl#ZE2RU=rkX7>P4o@Ypt9;rJVo$F7-#2oPCb!VIqEbEXsKZ!hEtpnce z@+7xO0U8xflA2NdN$D16_T!m8L^}CA9h&NfvE^oX;)om?unv$j-2`9go+aW_%Ba8I zXV!dzKI3G&h*@%^jcno{h-YM!vHbibCZsKyy67(<k67z zeU7L+&!*D@ox!?zF}WYE3m;CpQl-1knXlvT(ZE}SK-;>LY^z7Ae?$~Dm9=1DaVzV@ z>5p<8C&Kqh0qzc4QOo7dq@Hv>J*|6`tch@ipd}@Av+7`6;fYkFxPmB{?IpJkFz6(8 zu-0qK1G?*m8&1zv!)V6^q@mJ?eK(qqGW}xcqm;+Aw8DVA*nOT}e6Ho7KO=+fh|eXy z{ZEkT1>b2>;YVV=?gQO4QvfISe@`ku$I?D$I%)9%5j;0~AMp`SA}apxiE`OnqIV*O z99ZHC#TM3Be|;G3>|aA7EIsMfr-94@@i@Bn>z7*Qx(gjuU<;WaWMKV18K`W(%c|~r zKpQvI(DJhaq)_HIwTfQBDy(Mh-P0wAM))f7s%ipRz)_)t#I`eY?_1)fkYQ-_{R|nk zxRSX~`qT0%ar@*NbKLlF6+3;hAw~_zqMmLRL~gnO){AOE=VArWn4*Q|na-db{hCT{ zbV5^~CVHjWo}B&Y%ig(WN!_-{fj(8jHwUKBy(KC*M&=Z~W2p?CiRrYBGl>dZP9~FB zRSZ6~lHPAhVlPAlQ|~2Hnd2?>bnGY{SpHNFw|(5or1nw6Ju1h^X*~hd82OeQk`%xz zR}JCXL`e)xXlC!8DJGo_x0zKkYK(r6EBcfVf$BRCNR#|SqOb6VteoeDB5yxY- z0gn$f?dChO)}oT_cypB&rww8KF7KjFK{BvQC5mpDZ$#C-dE@U}O?tQ2|k&=GXTdD3FVOpW$M(|rxWyG>WD)`=xqqi>F3Tw?R-GPVwcg*`LZAo zqX|cD41~f@mmD(lH0ce;3ffrP9}<)I6B!QB>Z^_9>Z1FU&&&@WYCe}*tC5w1J~aZkIti1v(S}po_>|SNaXEyw+7kKtL zxrE%}<=vn@nLfpz$dRz!XZ6gR)L!(E?$r zckVEGbx{dxE^FfwCnuPBC7)J*%p&cP>bQBkE#|$vOdRUeV8o5(q-C0f!^q2T>GRXN z4vz-yrJm0FXu)-9IPUC(l$N^IPmZZ2@_vM!0ZAe_1L2J+39{L@zes-$F~#qf;?Gxr60WN2&3(K z735*kR}$Z#4zrsi;loF1_?pD4RSS30-l2=FQ#(oAAqi?~U`Eu&n9&178_(UUq+1Fb zXwKj{r1McbQT_OpR4P}}*jvNVu;u|h^3i~%d0G(F{7l-m+?0%&S4zthwPB6oRcab6 z48x0TVfZK;Y{^cf^ZQqlf?#AHjuwUHeRrt$<_oOQqapA}rVpIDdDy{+n@#klt!0fj zUt}CEIFNWg119mC4dmr7A?EKD$ikBf;50mgnLo{{R>nSlX0*@DXAl(iaSOn?jVSI7k&sf&5BoI5FS|9k)jv*{X?TS>YaOf4(^P6~o_9OyHR3uMRkei&9M29;Y3QAaM7hP-{qEI*q;FCEmzl7Nf! z{D5I(y@nCaw_(Wfp~PWA#AM!%t_m^@_Ap7VfqA2uOv_CdGM#hoGq+vzF}t*m96Y#^ z&J8gH!;4(`m*GcBt`E=1{K7d09NXsq) zSB~3*XX0}r^4%H?>KSS+u#njDYOV5tKKdLULaq$q1E0c~_6y2SQ5|Rb z*f$ZWfQ3GcQZHiym4>0p(c9#K#eMopP8*cJDj`Qv7bDq0$a%4s&R8P}UJr(#PLl}k z8|lE#OaoM!u7mTv)j(loE~yc)q@5W>w1w3HwyzNBsBR)LDKeb}(uacQGD zCl8P+MxJT^W=rlWS5vV~fFU(W%qeR?cJFNxlAuA;EUn?4VGC*b8bG66#NnK5I=#P1 z15KuDq1;Rhs4&(f>YMhlnGVQacu>tQ)R!OyOV84xY2$*AaU}lT1v;s78x_%W#0WK2yr~B4+7>08GT}Qd>aR=M-a5na zffkr2>jZJjY_Z_pd&Wa!ExG%toT@u}Ql~}P;Q%?~EPy&u z7AS4nKt(K-;MKd+Zma6Un^SJHp9dU zy#G&Jf3rf5o^uInaZKL*VU$O1eI#hNRo)*@usN!)gjE6X{3m z=^pEs?6Bt5wT*XN$w|v->UIAqjoW^M^4&9rhb1M{vrrOS+C_-tOg(fr(gID@1LV}# zlO#R#0*OzMr?}99Tv@|ln(tEk70<Yv}K92@{7z;^S7x0BaJ&CZsi|fz4fEiQWaZdG*Y3t*U*!@K)ve{5L z?T^HV#-6Z9GaMRk3pg5w1;Me&`uMD034D%|b+ilLM$T{7g}G)M$=;F_n5J5-C{X`$EEXB7sgz9up3xT$wHq^5Qq;~m6 zR;%MN>C-ZqjEtHFsm~hF?CVAdU*kzGO2pBsnD0bjNIEu$*}(HRU+KvqD{xh<78-68 zsyqDVE$&h9#8m?Au8WNw3 z&%`5dBXY+GIwo&1K#R%id3C)Jw8};5MnxQf^NE4jp4A^^Oe;_;dBYHPvu!mFSg`;HD=R0Ttg((r_bJ5-8pq;ehg zF#Sv#Y%`ckmf42jX2rMk`mn>eaCro|J6WG(G|OR)#8LX-h(z7b|3B$0kcY*hN?03j zhl0D8LtNY)Vlh7-#Pwp~uI>x^l=Xp0N9Bp-&UK`zpCW0$9R?~hmcrnsT_ngp9Ys%P z;GD^ZxSV&tNvAD9xxs53o(9jr5Y0=q-#s*Ip2X+Fe%~nA9-9Gu&L^TWZ!&bbE)E>g zQS@G5CT=wt3Ca0}xV19KRVgcRoa6yWOC3(R2` z4|*)?6^V$erNIkdlN%bTu;rB_GF#o@W#=*qk;e#nMbVVHcItgRhn*0%pB8NGUNK@C@G6w->ZeZkF67$0clk)w9$q~A$V z@cDF*DJXkGJaz@q>P4H$JliB%I{PZMEZpcYIG9)K{Pkp+OFJD}w~ib<{DM69(}4-^ zl~KYei#!T?MfyKqMWjv(llsLMm~AS%Y2lRz)W7OJQT?JsE-|LGZ~yW1Z18#Nlc53z zV(;m3*PFz7rY0_t&8KoH?X=x{4x{W`PE%H`BUam%(#4w3OJ%%2hB1v{d?9M4iek>CKZL*pn`HsBp8O=DS)RYt(#{ zUb$7k+|Oj-lXWS4UbnaQ$+`NFtg`3_yu_d3z1 z!DNEyJ=Q>;!PXmYw7zyWurjJRSnD)B&@M^dKT8rwht!kI-4qb4WtT zNE|kylwl%MA$xNu+KF7It+o2}^RQiP7C#?ImTaSG;>J*raFkjeYa$MjTJZI86r{Mn zrTkO%94tE*5M3IFfzuw6Adf4omhm>aM)oo((`}=A=KE;bOa&spwT2m7+(y+)bV=Ej zrS_SN*Ek%LJ3?a4Otkl&-o{*z@+J~dy!*4!l7?y2(BLyZRLglV>0hy%Y;jye&VGm_ z%ZG^)ne9$comj$Z-K`<-6j#tWlO<`8&lBddTOaCv`#Z_pBLLRV7;^dcXR^53gtSGM z5LfAYtY-COD3#KHhi8yJTWAKhrW(-HcL*#j6vSZ?yq>nF)}?6$^x8-+3ZuohljJqaJ_&1MRpO2BbZWB6jN zg=6;LAt}w9n3w%SY4eQ}FmW7*Ip{+vt00K7(Hp5i^-8eR(W8M08MH6t(~at)XySE` zUS4p8R^8Bms>i(9x6lrQ(-P^Gsn5ywZ9}l=8bEulEZzQ=ADac=6So903_fZC!w1X4 zqq9L|M933nOsy_TUOi4^##|-Kg>%S<$klYZfjP(&X`;jIopkbRW9obHLCw&dC)BKE zH?49P!`d0@G+@^aBJpf0?~L&BTGx5#ty>vY%P_!YZ!fU<&eEV|!zJfun}J!TI}Y7? zpV+Hwv1@EisicAtifS9e)SHKyowY(Jzp0UUE#68tm@J`dI&`UD%^)&&?NN4V;}r5q zfgjI|%OVG=&(X%rGxYjWQMi@ug!7-fuw@luur1P<&6O2|_P$G~y0VvpNc}f5ZSG#W z)k+479+gu|^$vO+6&bGH7%H@K8QV5%J#(Ss1|usqfkyhvV40>is_R>VcxgTRZd?NU z+501XV5J6C+J@|@+k@d^{|tJ|Jp2FP>Ac^1{G&hK)=op4rb>IG_kB(jp^%YL_R1bv z*%S?>iBckEQ^|<*zRzi)LLu8HWtS~vr?2l1-|ruAUjQdS*uU57^*XQfJdek9-Pe7S z>Xp;9?&lq_?PY~?R}JLM%75-Tbw7PtW6Tj1U*Y#(1-`q}gQx4NV9u;m@c0=6<-7g} z3KO$viLVR_C5jl;Y|5*g&3S6%Trhohg%bZXiLP41=(~$Or*${sl_-lAqho~uIo24w zI}Ubk^yH1beBi2$5$cy1ikcUVK}q_Hdf#`)t*zQP`=BO>-z(|E{U^|BWsJFhG$p02 zD(vdqkB1tk2xbd=aO;RY6eFv{AzmM7`)4_K`C+t1s4UKAd<@nq- zD0pKJ6PLO0?EN*QoMMV*vrM?M*@6oUb#TO`o|wJAi`MrZjz9Kv!`H(eQ)jerZFH~Zi>1PECg|#GoJCY^arBJiMh1O09qjQQzd~#i~MDOl? zDl8?4u)YVdT@8dirL=2_E@yTQqY+Kn(DCRsY*aF2{VQsmGwQFT?ZheAur(hx9y&r_ zx3t3pHlU_0tI19;UXp1TC>aspNMf}EM<24n@#{Mw)kl>>cb$_Yo0XExacho;QP}T| z0CDXy9I#3ceIEXV3!hF<=%-e4sdp33o=lfk-;@PQmCKat`xFd*o}$MoNM&A~G(Xb~ z@4WC3-wQRc@OfVzdAbzJ9;@N^ep9Gvs18ni+Z~Ub{C94hDU|zH23H1XvBwfcoc+j~ zZ(Ot^SUiBv{P!QpR9RtmkqK(~o3ZcZ3zW83hl{RG7B&>B@aVvg(m8gzxM1)hdi!WR z6&Tv^u{nrUCpN+TMQt=A)1S_Nsf77!NAuXaaiIE34^8K-rOx^3P#1KHMr^BuniZqT z-DNIZnhFrGTuxd(#ttIGx?ul?K=|*6D~EKALz!tE&@0gpw<&vL$m9*QC)x`76`MdZ zzM?O;5IdeI;?aCdob1so*t;6@rNKYNmR*V%IO`6Kop=mnvzuu~yfYdp1i-1an<>Xa z39H|8b;te0+%_To*2{0BdXXU_yyDoyv<`EQkQG-4@ zcjvo7Ww5E;jTKka(5MOtm=1XZO*>psHsY$dEvN&0OtN82SBYpkAx_$OT9xfiGzc1- zTzS{$G;-G-0e8RUHWkZ2znhzer3=uqPg5%2cW-@((s&DDZBFZJc*VnE|BJA^CW zlf@wa-jx3K2nD#O32Tm7pr5-7HxB>aUphdJqJ~wA4_y^OTgH>!9IjE=%WSysJ%Wa% z{s*0(wvdv45#67C1-$OBBZp_!?7cUeepvT_sevxI*i#AD`Z;m(6J0!i%GBrE>3*d3 zLW5UskwW$EfsimzgI7C=;1UoZR-F4S%-A;t5|_Jj?(YwR$zOTY%!`1(r{zKVy;Hb! zrXQX;v`p$c#h7y@_J`~PcD(oTH7GAr;7=Prf$xcbwRvxXvpcO}WJnZvCKZbR4Rpt! z4$5r5<_aA2{|;f1F60tS!cygf^n0TNcZGeRyXytIZe+vHXJvuzE<@TJy9=V1-3LSE zdytduj9UN6u!r9fupK%ZR@qn6*s*mq?#VjvZBal)IR`%cD4UM!^+wqZ?cnFJL0WC_ z4vasT^4!J7d}DiYzZd^}!GE$1o}Uv%rKh{o-H>gdBRa6=9T|M1`&d}utdCvF`Zy_4 z2_G*wNjky$Xtk?Zl>ckWUUz%aA5S07coIz?OwWU9)d)(NY{$1p6;Y!74w&#l7B2tl zM>4Oa!r-??yxMUvs1{F$%n2iS)JQqBT6LF{r#{3z>O|@x!|3G1)wx>uPWLCx#y^x5 z_Lj=x1_u)5lv@yIYQ>>_W z!Slu+;EsK7j6BjnZ_miXM;`^DPlG=Gc@sq&9s97snrmX+jNjD1XA}G_@xfa4^Ma+^ z4$(P&AI()(r=riE{4#wR7492~Q_9ZJ$zr{-VwbPv;&6Z(cN+57KtnWKasjfx?H7!6 zilyJz+tbAOztWE0&!OeT7}D%+!RHoQOIvgUz{0;ph^w9meg`L$LcKLiYcrx(vy0%+ z`nhz>b}WTg%i=8G7t(SV&+`dxysYIIFdIl~Kf7Fy2q5% z{!u#YSRqUvs*6RF4KTjR7VA`x3)9^fLErhsq%7+Cq`zN<^+8EJqisR z^zrWZ-7xED3b?kX;0;-Q+;{#yZF>}oduqe6((w+A99oFWFHFM4HUw!_V{qc$Lb_EO zA?7y!CvChj6~B2srj|rUo|a+3mP?1AyMB6^cuR*4PdETMZZE`?fF_>@Hvar+S{1C8 z4+Qu3iLmgcGaD@oOuKsnX7=$Wg`J1s=6hqV_}Y_aK62n(nae_h{Yff% zCE(ftBjDlOqhx(S2A8gJ1Nn6ZY_;YDnVeLo9^X#DtK1|A)QW+H*Oo$r7K7c=bcl)7 z68+s4V)1u1^p!-Dr&=hP=T9e-+&=it@jKP_w#V}?^kDSN94P;MfqqB0!-9Bw>@_f- zygZ$8U9%HT-YbL6Pb=y5?F91c=n{TWnqk$11`GK0QUzi5qnN-mkg>I%pK3K z(cW)|q0mJWTU4yssbMc&>Z%dDU3g4m4$A$@rTJ)LqRGyuV|mKJDi{?SE7^4AreL<} z8@OH!f-i4ALf*?tF}>(8Zr+e2o?RCSc+Q=|#Bd0%n9swvtFmkOC+Z#b6l~v3LG7$B zaN@Bx&ntXP_C4N`eC-i>?c|L1j+!+3)hbY)SOz1WYLUrIbKLj+AkD1zqE%Ddq0w!m z(EWfLD@gW1Da!JYu~$gz?*mbJpBdKnZ>RdHQ>az>JD~D^GMN zd7cj%#p+R6jSa6Wu7z<&Y&pC~H(WV$5Z)UafV0oZ@T{Pd@cCO7h3I8U)8b0t$2eC? z={yF-|M+J9`Ws-VsKh=Y+v!(GKAGvNK=d(Vtcon61v>`7=(g3+=GF_-(v4X~Z$GUc zQ%}+Rd*Ie9Z-h}RzLBDlB2RM)0P};FK=)TqRx4J=K~u-#(WH^=KWQ=ao8iSLg6G2C z|7sWpI&vr{x4!7G3m8*WOWlw zTb4&3{9A>wDa&Y?`)uCp)C)Tk3u(_3br$;!=iic#FfMSoxNb}XjVLt5iuuPN^OG(6 zns1@@N0xj@UI*iDrP6@|CrSUF6HJ`j278aCQo}GM*lwH;Wj)7ptY15vZ^=hP2X9e6 zUW4oAwNbOdAG-a1EPptl1f$=C!rFil^iXRIdc?FrQp!ppBHjRZo81QgEn8{wy*~J= z%L4tc3;2HZWg#Qo5C=RqVoQUuoH1=APdI-8oDaMMjap~YoOPcL-FO9ODhKnfFB5r; z))Dc|GdVV?`APA|EzvYBlS+a#cvQx}nV0U0Cu9*e1)K4P>T8lCM-EfF%43+YT@Fll z%Cc*W6I)euXZa`F;j+aEnlPjnkN>@j{B#|~^K>X&!<> z^Z;(ylO|+cO#mY~OLWji*H%|RqGMRu`u_g#+NuklZ=EL``@EWp zk{YB7?}UQSp0gxZTuE_lW?bs1!m?ZK_~J@A47nTwiTyT72K1_i$*WzktodKB=yu1= z@6<7<>IcFrnB~#rm~8>KcJ~$voIgnA zllt<48S`k$j5f*-kmC^BN~$wh2i-SoFzu^=8+I=5(qtpYh%zfb6zv?F~of{C=1`d4Q}VxOB{de;nZyocqqnaBf9OKOhIlw9Mjqzd9)X+hNObycxO(s^8xda z{WRw6d$?~X18p;b`i3u~3&)zs%c=#ki~IAR`EnA+>;(AGti<}?j?iNDq2kVRMbZno zO*S7UQ&wj${U#N6#< z0$$vp-j7ee@!-#<<|Mnh;NRK}NqJE;P(>7II$VSJUwt`5Llvzpa-pm34J7)#6QTyM zg2B_%XhW?v%3K*q`o@WL=gJdm{VxRXev=V>w<_cCLVf)H{xM`M`bCYaa>%Vffn$~X z3nBKN7}lc>ninHXZ_~jY-`7L^HtI4|H?J9^v=l zpz#sV2sxlNkZxUev;+J}2S#{RwnM>k%wJup4Yw29u&YgQG*DB(R4fTWenv4jjKnb}JTv zS2fQItHlT33-?pW&FYyBF>WI}WpQ z>gi_CLQvZG8xF`hVsvITyq)oZ<~O~7?7mUtc0~r;IxTQ%iz*K)=#IP3-l0aX(PaCu z>f%kAHmUKeDxb-Jo9WRBCAQW*MJuyRS@)R?|F*v)gw2pc{p2tqM#+@dc7GsMHylra z1s~|xk3K}boP~u&h=0cZCjYThacb;Qxbtg~5FV)lhCzK$PV)(+XP*Q0yNBuWPj&Vf zl>!#I+8lh`6HB_ZQG6y3=DqWzXJ2IlWH!&l$c zacqy+!l-KO@$E*m3!sQ(nYUzUL4y%Ya@YzMrU8cm<9FGBf7e~4Xo z1OhU*Nnd>0EuNYciu(KevX;0L0@O}W`e}C*z3kXX%bn+~)`5J*3_5!%RPrgfJ1)Lg z1DoMBbVMG5S*7=2_~jh(+v6{m%AOR1qCIfrfJ8c+lLH#}t#R}yC2qLn2JNwl!ozig z1?A_TV9QJeI`8sUqM*1@Owey32vKKkOFc;MD~Cd>6$?E?z3`EZGt1idWkq>= z4t+MCbShtymc2Y4aWO9dPV&Ke$^M1A5A} z^eJ;Cg(Oyr{#qyKoJE9qdrvnE(-8izHtw zjrl`I1>Hp>eC2J)4Tr+TqQet$#ja}FJ8T17ITQ?=@~s6!pN%lKb{7m*UIX6spTMK9 zD+m6O!@x(TToC*d3aZpOe9=spUb7cA7#yPW6~|ym?LoRLX%_#=TET~>HDxB&io&hz zZX9xz#pL`PxK%%t{1&+2LdTPk-2#H%ae3UOdYhUyr9#ovRYYp?baWQmK~r~-YPu2f3;*|&p43HN`ue>E6(_lDY&1u zLG!IksL!$=RP}k9ut;i(Q~Yb8VXZcQTH-*KPH*Xro*K{S@WRyH7ofd-I^@3?A(k9y zE;BpS7r#H2;|bmSqJH3Ja@E}r3HKg=Wl<+Vcr=Z3X{E4X?mW111hlojf}mCZftKC? zab}tiJKvGVIoIppKLcaYaN1N_++{#1`TNA?&s_)MNo2INTaf~HyX?x-#}<&=J2mWjq=^p|GhtDi6W7Zc zVT6wzEzT40p3EEiZ1YRMPJd*O(?Pv~ThBF-8zs;=3w5rJ(|9I~Ks%Zk2*#fE+1lDzIhaE78qD-TPFBKB^;sZOrGE^g!K^Na67{?_w-6~Y?H*J;+UN#%q`*j=}t(#o}byZh#Xj!6rolzH*Mi=Cw7c+}_gRjG8E zOSEXJxSvK1{Vhh8o8!=RDx#X=3^@O4FgaUci z$U6^j!$AK6aptoOUVTXh&DSqP%Qb$q?88f8*9&!y{gH^Oaltq=KAQ|ydhyDCo_Ts~ zH~doDNeK%TP<5IvPJ5%rL#{2PsWO&0VrD#P#OmRTQ{j+zFAdJ#??&F^YoOE8nGzKn zg{)-{sNa>jbk)HR$7dAK?&;C|eeMaaOfyB>gU_KPv`AQyt_`g>#be?8FBN`pTiz6MsIS@h)RC(@4l3m1pqfa>Yy)HJ<})|eUyOAf50ZKY12 z`Pz#=1|Aj88HEw{(<1A25*9Yea`y35KI_>>uqeF&PkkoC690+3)HhUI|Bvx2a%Exm z|T*&fc!5a^|A0^RQWiE=YXvtR!$G_P;95NN&{3U1j#$dY{&ll4!36&+E_%2pWm zBAt^5ZQ-NCM)Lqe0f%gxf*#U9l;0dKdX|qDTCC$R*eZ#FX36>{UPo$C7|mm9(#f|x zoi?6zh9$jSahB^TY7b6^iz}|6%k=})EJ4^vbD?&$9DaXv4z7*A30aR{k*xhT>aZ>) ztGp(f60FA)-1Z2@B{ON>!7!ZdQ3u<@Gx1r{A8NYLMyJQj=dR=w3T-W6jXxuBsOX5R z?<+_@{_>EF2@T`By>|&&{R6QenBmpniyZvuFC2Pv82nBrfkl%6Zfx=8Tjf!3?NkO7 z4LAX-b^y=Xw-7$~IWp{8ioslX`)%G^_21#$dC$UWx5mak1Y^-C(WX4KO7dG6S?=>ct6r-8NZt1TSI{ECE;`ZS@?J|9u+eF zWkb&oRO)BQrK5Fml$;#w@KeD1Eqlm5!V5J&zLVUoTZ@+aZKO};oQAai%du~xxzzjP zG@OI6yzf@1O#8TRtFnSKMVKu>98U)Y19ENr>_^VG9*yF1t`K{$BV| znCi5JI&Q^_aSFv`G5N7zyr?HOH>QGWO$GEHti@REhr@^eS2oS@rKo>Ph6B#DgRFHX z+`d@==?zEdxraKtWn@rgfeBCOca$!U_zC(GZo`Bz8a(%WC>?&6Lv6msFs$02cG=cJ zUt3YSK0$)+cj75dr$VT0>ykM2ROJ^AiqzLXLo5s=NRXOglBW(fNE09<`v{dEV(FZs zMj`%bBY6I@5bg}q=cnOnP_U*q6#mr~FSy@=yU!hX;PC^(>>F2M^bZZ*ydj9Y-~yBe znb6&RUukAtKA6sR<6K!|klXeGj%Dp6h21)_wdt&&_A;HTf-Gr>MGzon@-85V+LFlQ%RHbGs%B%FP2$bN>e7JQ_vD8 zc6?xh8A;p0W4SY}c|2L@E|!ab3T}W({Uu@e%1T&bds`S&E6c;qzJu%co5jbI|G|WT zIwY!)xMrCrCsj_T|IYi-)D~U5pfw1eh1jr#yF6FV>i5qHbb&v`(!+DlC21#8h4s%| z|M8mE0#eSIo@xpk0&{AR+G4n>|Z+IKXQGmaIGCW8*P^ zXvk18wp%Y;F)WWC547hoJC8u4s-kGtF7gJ2SQFjny7ceNqxi{&C$bc#!y!dN%wgzSQav zSIoO6Ou4rcW|-a-#h;0!tM7uW{6sVkQh|?#UexQ;DeAfY8&uyo3bEJp@XF{>yzKbd z{^zEt2y+(bQ#TI-@v72aTK`7C$j^$nsLWrozRnI?>*B>Hd(wHjmlX$I{a1@qTKqRL znBT=MB%^sPFk*cUu5@3Ck_&y&L!yea2FYNNuYCEQJST2ix&yX(Jrqv%n1_n5t#Qd) zSJdmX6(R?_lwC3Fj-@tw9zH2;|7s#GG;$Yjr~qOL41qJt=fNkdCp5g#}fMp*dj*8NP}8cbB)&Iv;i1`_C7g)?1)vZM~2z zV}XLSKi7Xbi3-~8EId6;UT)sJVtxfBYCn}!-g*eNlumbDWKn7OQc_%(L^quy=>2Rp zJUy(D^lpa>o?l)GzMnPm{l2mIE$OGAw%Z0T>kfbghJE3|>gS@IQ7KKb+sD#Ai3cC}6E!p`!Q3RFd6T2fe;;g5Dz(SaCrBnCUElgC7enXX7OO%GvQ@KdUn)xym~#^d@Qy|E_77Dqd7 zr>irp;dyKs%`0eyHamptr!>TC8P%kA&m1kC=Hcb$9rSeFT3mhHQPga|Ax>3{AqR^F z@#VmN_{+f#Id2);s9Qn5ua>Z~{*iQ!<{qdD*dboJtS_3FY|DOvls zk;S8Cae0yvtJOxp=<`ou+!J>i-h7?(?zxb(qYf@qo)Xp5%&?n{m*nyTE4G!jWM8B8 zWEikPTxb272CJ$G+RfdtFfEfh$NVSmQZT^)6_ngOUR|d1_68;Oj_2?@-|^{!9k6a- zI~|h~X^5+pZ)u-bqKU?9QXLk^(v2?Y_fZ*iRWAc|S%9-f1IhndOdod4!y8?5slg?g z?p~P2hVNsA(ZydWwfrwXeG`SM9U8uxgS>@b18n$0>}64R3CaVMUK;JdtmzJXlT*C0qRX|oHG9p9hx%%W|zi@JxG}O%-w0f3;9zGM<;mbdl0!hNxA>#6>^} zBTa-=WzTj2Mp-fQ}~&*2lMoucy4Swj5H4R9o}>pPWP87PZ<6~gb}adb@+2Uyp*Jd zmxsZx-WkG)gI2zSywy=gdYKRJG-ijjhv-b~7rNkN4n3DT@RyH4XxX)gL~B``Zc`%G zgv;}y?)!MRPXkPT(kZ@ho5aTwHM#erN;0e%&3)3=aEG%m^t-E#HY&j|f8=Jqu;33# z_a#!+FL@f6@Rxkzb*TFM40L04I@($W&KVZaJUbc=2EBtN)}w{rw!dNA$WdVAWr}+= z#__g2aeVLcJUSFSn)~xK;FHju&jO zD<^~g3ICjW6a?3Yi|fzU!$j=KPn!-)kMz>#Nqx;Rt3{n2Enfi`{J8AvkGBx>BrSEDexqwjJj``NALQoaqqHm zkbZwL$yXS1_c582x?Ki8jg6w7XC1+GR1*c>dk+g==2QR2sr145D8124CxiLPG_xyJ zI_8syum0OqbbJ`epDzqT@%wHnz3G5|RMc^KeF0SI?xZr#!{YD9MSH70ytJPOx&&!r zUjtWi-PkVl>D?p_XqDxrSS{Y9?MjRGDe-}vLp*JRBd=D~6z9~w5ZgY=lIyYY)Uu?G zaQjgqydbIUw+a!IZV8Lm^yaDaU4_;~x1jHi7jWW(J8NZV<6ECVu}5>WG-a497Ck8?qlm{O zXVD*@t_Av`Z_HF23WH9ag4wA`V&i;y-gL^BMsD(E|4>u#S#%fXdACz9>J;x7d64!z zQ#4MDq3P|a+&1=${zk}_WhorUJf<7Je;zx5% z!O!Lw6jYK9>wPWwd|YqV9jMQtgG~ANw!4(lbYIfAst+nmx1dW=O(aptg1y%aIqSg< z;cnhddZwNXeM^%lz|$F1wNF#=#acRSB8P`&n&7naDUz=d`l$ak3iiwigN0qXT)gH4 zJbye|lAvLZnLnTVEctDNib8izc~K&`EU?AHy1KZ|`AtjAa(bbK_J9M}o|!Ypxw?k`e3o*-H)M~OH#1j^(z zuy}BiW(qB;1J`tWy z+zCntw6Mns2Yx%|GrTjrLMI0P5H{BJ06|_VDSlTCof_S!$4U4n&mrn- z5#1SkSlm`)hlbWg)U)mjB`5gP8V?Q8Vc7vlU1`Aua-Cqc&IJeOT%n!?zVIq`C7g)* z4ywO)2!Hdx2qA$k92n&XEp4ahd73W=U@jcV)nlDjRrK3h3Wvjcv-~P+u6pKzZp#e$ z+QSZz%MK822OXqz#RO^jto{8Ro?fA==G)yI2#SU!F2g8EdTfpV&5!zql$}1ezSa0t)8a2Kj zjeYw;9BX}$#suGoB-s>-)t^oCe!i#Z&IhnBWsTV9&u18yJcNvNda&ZxUv#`u4~;4Y z@bT|&s5#c2Qa;pCozgQ3-E@pLq{~3UVG~MT`HsxbKNC&cWbtrT9Zl4Wg4ACZ!8fZv zPTz72tZ#PE=w(y*{*K8|kf?{}7I?6NukXqBUfsk^ z#rA0Jrold8#dK?}4K3Jh#ZPWqh}CPa!-vA_P@4Z8S{F8f@1RvQOwX7X{g9`?L%*Qv z#5;=Ub%SYBoXs4p}srW;>Q_8{}N{&3`W4n;K? z!SC34H2%RoVRMitdMniULC)|GT z!ubV#Da62&x^7zH>v$9Vv0IZH-#Eid*F^Zd)dUO9w}MSlg5VN*43c+OLe)nl+=p)j z$(jJJKXwu9$sh8 zUN#@$lTvS(`Ry>A{UxP7dUCv=!4ivn6ND9WeRyKkpnthg2`=l;P(r;6)=rZOxn;G` zTu}-;O6}3*UylzIbfG!W9%I^VG3lo-)=uupoevctJIx*ApL#=fxf_*~Sc@&2wE2ES z4j3!dQ{r72)Q>jjBU38GUq7P6JI4(%&_V@^qz({#Jb>LyzC&}64Muz!hDJ9Wxpc`7 zP#=36`Y%f%m(CP&e?AVP-QS7V@8rUQf~oMgz8tDP%kmrref+p+8#Jd$X@-G2s3=)c zT}^{f=IkSG`?MT+8B}gdAfZY(4En$CHjk>FB`(8!zyT#^E4WBd=Wx=jhOswF~;P`VV-w31{G)Wqq`Hi z;aVmxJhL2w+s2WNY@=9`e3Yh|hjP;AcY?~E?W{3C1;67wG>%lk>BY}T!#sp!eEM?p z)DYJ2&J}kC-xC#6GdQm!6l<1P@{aMbuqbo_mfgEdQI|7$Uwz`g`ly3#?_l1#b{9(4 zrb1-Udj5IX5Oz%)#RE(f_`3Tvb~?U_+H9XdjCmpkTuR30emiOA4O?N`#|%_h=L4_D zreWHt%kb!}JufYLCoZVS#4dfLljruZV{?Pg{^uvaDy2V_UL74v ziDx(eeJt0Rfa7M*;GlOBH1n;8)Qzpu3D@VNy#72eJvoyvKdKa-zueBfQ-`33WD4hB zcjQ~m133Mf6E#gy$t%-ww$F<1*e4)MX%7)5?_&H{ZpvQTA864f@%z>^%!U)E(K z%}Icw{r^es^?oWTSs08HbM$awqb&5Nk=)W5OF6L@>BYbL1kSCHHq6||F^i+v&&dSe zt@gr(iA#Ce$4U5S)(AYkW;%}Tugl@Sc4BRL9A3@K#al<3Ay--k4cD{q$d)@aVrU^G zT-<^EHb!CbJatgKU_{l*MmX&K2;P+%!$a$JIKB?(gXN%P_t)H=d$mehL8w zlhF0)3K;XRu3o>Qg#~t5;2ly(4r3+knQzRI@nZy7R7|=(^tf#6K>nzhg}J{Qx43rDypQ6J}RU5(LBy~K2d**LKK2n>Ip zjHBn}(WH1&%-`2VS}V&SSS=Rgsxmly-*9lYi{-ad7qabs3(TFi1w97ulBmCz{>zBD z_;bh#D0ETgv!T0j%+|g%q*$HWFE1vC>ecvgq%Ul3wPJDaZtTCLH&%|*1|9!V{OQIr zY&{Y~Ij3cPj#L=K%KPy=W@9(Dzi5MV)#mZD-0k$npbO$nhf~qqJ>370E$593!}gN_ zeD%5uztKN}It`~llH<(=XV&vTb!Wu*o5JiNg?zAS2)S3C#Kx7$yyIXACoDVUb2ak- z&VHc5`#H6Lhu+{LN(p-&vE20c(x9COQyX zBSSF7Ck#VI74yN8LfoR5!jnI#Qt8NC^0)RwFU@D-;LRF*tHl?qeD2Wf-mhT)xB;l} z?isvXz7A(T+=TM&JNe#})o65c5O1HtDfwebo%tUUlF;XD`Ep zjxF4CwwJW`NNzT)@e=a@p`7$W#Pyv~n+j+>5 z6`Y-*fxVW0rca84_)YH<5Hr;n+e5mcX3_|{*y9A8=^2c9x0Rvm`wp>RMKWGh9gAUm zmvYjvQP^{MEBOo#z-KdMajsS#3STY?hu&O5#TWQC^HQs38pvYm9 zJn38TaJ~>7&ElBJ5Vj$NDz%Mxfd6``D)3|Fp6mH$`7fW(XU5Pe%Te6?`zV$Euw_HY z!q*+?{}?}+jzbtfH17ue_wC~ASA%>$JzXo&GcBR*4@?;jtp$Dkefa8O8vh;Ti4|7> z4>?%Zrtl}M)vFnd~GysR+@CWpnNO40?2^4^d8epN~4>5PVt7>2El zN|64pAH9;VqJ+7&P=3P-v9j3`DHS%wBHVy_tfxY<&oQ@^qhoT{@3vwN1) zvDh|o#X3j)wrnSEP%}lZoc;L1cPIu#R>H#X(O5U)`ahT1&Rz9ysnKQvzZ9>KiQX32 zS2Lfl%Fo0qO>>r99K~=^2Y!!Lh9g^(Fehj*W_uLe1Jd z@ceH#TdOlEKc0j~d|dE`-zr=?G!WlD|3Txn2lK$W^ z#;NhR`f!K%;#M-ZYA4~@mTU~Qs}~0Dlfp#beyCz9!APT-9ImjC7k!TxZa*^w-S^>W zw`(P4RgL1LR|Az6ei`6&Y6z3SF6CKTMp(ao8gBm zbFuIIDC+xLl%8sf;2Ua5_+VHVmi6z+x2luTdT=3BpN@w23vR>5`kCBd=EsV5{cxUP z0@te{7Bk}R|R2-cePa*I7@`#j${Cp(9>dm(B z2e;7Rwb?>q{CpgmIT(!faJ%muXP|jYr8=mWI$1|omY`e({ljB@rQshlB@Rm0D$jjk7(~|3qR#)qQ0ue@Ua%cU!HQ~pd4U<%r(bSgr8DoqU; z#cj7eIP>)=yw&|6MX23^86jT0$?TP=CpV6rgNrb<=U?%z+HuLH<(Xy2_u1l${d#Pa zJ{pHi*5MtS3q`A<47@Qe6K@1(VNJK4JaMNip6x#dZ`c|_)VLi&&s_-|bpM(-+CCdI zRXy;2{uOGC+AdD<8-){{y|L(`2dh6_fv3aruvZ^Pu3V(e&bkfs_gX%$9chVsl}*tj zDi@=wPe^(U*~yCo;_+^45?^Fj>~I~0`zLS2lSkh}j%fxy8IR)8d)FWf#^IBRtstN4 z#M3UXghpLa=wp~9K5pEBSKq4fxTz+RTWWi$vSAA+h#fw^9@}7aS`sJKs1S?(wfE!_t5=WWubPWF1m-1<<)OWzdF;RSx~HkY ziXVCGdnp}LH}1pzmGZn|hAPg`vf+}rSTQm_9gn_P$8UDrq0li}oK`VYqNKbN4HtTG z$*6(QvaT0M|N7(cJC&3>VHp10R1b64OvNiY#-4xF9l_UA}P#4_t=@;q9w1|33 zbui^rEXt%@q1Xda7`WOFKflm|^;TQi{%a^(pQ{k2xGO`0`7BCoT*3z5qj`F56oOhj z_v`LPY1NkS#XpdbcTB-PDm6m*{0zaf>AiSG`jl4Ph{GAd>3D3m7cc+EG+CKua8O}5 zzu$To7nsPgYpawDS9|lIQv8WnF%I)I{02aR(U|yNTTxP>s8t>O>rA zHIS9uSxI(TGg7*_NAN4}W#;2cv|;TA-C)tLzt1n+n8U{utRQYKoX7k}Hi(jOCa?W| zciwJ&T{*N(SD9th3ijhSU$PMgPKh_dNAg_qu9(DMm+fA(#OqK>6y|QIaSz=NJKDUFPH{BO4TkIJt?|B;WwO4xaVS}g3TW>Cw*F(qh z{+XxnH)RN4z54~bm@Y~b@D`MTLZSS@gt~n1myM*|q@?V`H}LOkW)rp38*KII418~9 zk8GSgmRH+UM_w9Vj*qPPj?5@`h+V_pl)qMfNA_G)&i`)P5#q<{`FQyrOW6ALBYBmW zgJS;gwfOd~AIqFWXY$6yKa^FIw+42&8F{&zwfPsj-{JKejFWe2f5 z^2pb2jpFgFvh4E72>$h|`m+D_Ln5*4hb*vUmMGY#FOND|PL9htmml|?63;$3!{)7B z#nd)x~uD&eQi~ z`>=JQ$==?wr#W=!GTMwVndLqg@i0vq}B+pGD`k)O&Hf!u-Y`LgkXL*A&++kD>r-mqp<+2zuiBEGsb zN6ubdR91}|Cs*<)c`?hblwy@e$a$d+`P0n?3jt#~oW*P%AV!So$!kopW;WWy?GAluldxZCfuCF?~w#n)?gO14+LIMl`G- zK58~nrrSD(fBsv3zNXSRnIluym;e9GlewR>OZiI69P!=xo)WXzz#0A7sslNB!v!7r zg=&n4R{Ab5=NpqJuI?$z?k>QGK6vVXaiV{q@%VZC;uneX=N}uhh@1)X(es`>rb8(? zH>RD;Op;(u&LD-h?RZzvmWVovRxsi`NTe- ziIk8MVtFK!Sw5`H-!Azc?|M3uEPK)98#AeKKFU;4elTOFNEm6ecQapNEq~iDc6Be! zr%$UdFCEIlax6{HF0Gv!STTDDJCnaKFOa?k|EkrMl=|P6lH=N#oDa$>o@MCBs<-m; z7ZIPb0lBaH-F`H$PMsO0xpze#0e?v%p3`;Ieg zRQ`;zbJjzmCFtP~@(p6^Dt;~6M>UqA~`Ww^^X2} z)r#<8pH}8=20rtziuLhttwo~97QZxK6yv_O1?9py8Ti-{E>B;sg6Q61od443aCy_Q z#gWl<> zCaWiIKFS=Zb23_%E`3IH3!lP%ob-N5u=lg#Lf5FgMMF$MFX9{ zlZZ2-!?s<4a_uLJ_<=`7_1jT?Z>>dvv#)>8?(WSf3J>^QWZfSkF1&as9wawsm5yc< zZ(Y17eyV;$)Lxf~T?t<*%yn^;_jw(-t?ygf~{O zoNcm-jH|n{Uc+aLAF~%`73;5Iqc7B8A5{L;U;q8$tj*2a?E5!OKIvq6{%rNAK-8|1 z@=CcJa^Q4R-q|)@T=`lTU-x>8{d)H(`@#+7$7g0>Uw&}F-z2Gq{PNfy)_P-A*);DI zHl)I@%-6b*jJmT(WO~N*4 zZ0)oH{Ks;*6cNWD_d3hxEP+4DrRgR zDK3@$oOS;uo17Fqjt$Y7{9b+75Z-Z}Q%4(W9&* zZzcRLemS!`VD)MtR_!k?Hszno?mRBf_hl}|-&}pzAN9+0HtOk8(L2}A5x;%8oK-)b zhu0gqfKAqa6py1S1m2jilHGf;hD8($m+4ay*`!Rd!kZ-n+q5Y~gdd;EMn9>*jnrBG z949>TP~o3hzxYOC+QrhG<*g}B`p&ZR532;G?H$jW4N5Od&DA1`I=_fxC1;ALBEy*& zKUysOC4=a5KIl$zNyGnHXjdJ{JnqSOpV{lRX>o~^PCkwTAHWyJLqrJ{)Fi8 zc{~1bt_y6yDUI8uGK*V39Q02)U4*^2cpp13c)e(n5GiIP=9dZfPBv+8eb(4tod4Wt zF-t96RQ_Nc7Fmzym6ejpiKA^Ev82Am`P7OF*;ggKvgzxg{HK%C{dXUxvL(Gr@YW&U zv(F2?CrTISkTUJYY?h;68QyQ9CQsi!$m-oGCN_SWftNj9Nv@c2JP`d(f3e_Fig>MO zQC{PtGQ4BS#zMd1Vapnpk`KHi0?!6NPuail`#|d|ABh7A8Xuaimx!M@gFPxW$3JrR z2zmUi?V|Amm$zSKvj^j6u{WEA$$ibwu@hNr|Hf~21#D9ZROAY#wAhfRtHfrHMcxDFFh~t@j3Rq zsVSU#3)oMEDv0bGD{*gJ0kQS!Zh!U&4<8d&j)zBv^M@77$;QVAiD4DLXC+c(#CmNj zOSqJQj|}Z5e7(L9VKr<%&JJazzgx?8J%5XBOj+gcJ25A(+h`>_YvtwFN0*W3erhJ% zn@`!=+Pe5YV-E4vX)C#EOm5!3P8;#aSD0nbrt!Or#*0ixYVv)hye#)X#wxB` z#j4)R$d7-sGUDiq3#?hA67sh$%WGCEb&GxFj1c#CHRU(1*AXE(+R0lJ7K#GrJ$!qH zdUDb1JN{FJE6K<+1!dWr2iYfukNOXtF2W!7YbM6d&n+unEGcJYGv)5|Ys9*N#dvi4 zjUwqpMSf)PByo0Fm`uH0Sbje;zl>_|fbAchOMcPrn7{pu@-oA+QEcX|(}5x#LU`-L z??)_Mn4N9ubAdg-U7er)DI_J+(}(`4tIn|>T9*|cZO_JrWEdj8T5~L7`_~i2>#rMf z(U6X;-s7F($m0q!{AmesGsD%I*XONe+a8P!wD@9=DE9g~*0)hu;Eip2Qf90@oU;D6 zl)!;_uxamCWosMf_m5v3%DN`z<)!?${4rl-kv+f8D+gsLDnpv@V5KvZme&%eiC@p3 z5bLXgZZl{5_qHs;Dz?+4r*2+3G*Cy(JF`U$ys}Z;_~LWcf5k5GXm2al{aQYL_Q3E! z;-O`Mx7$UD6MZv_qu#ra}%yQ2KT&9EVl}H6Q9~B%&{!m=e8FE7FT%t|%Araf8Z=j(T#&@5$ME<)4c-8mv z@Og`yL`*$0hGooCo?RGHjMeM-1FLNkO}FRd15 zO3o3ZyB!jFCzTDnF(<`uHz+7G4IdwfdhwQQTRdCj#T%K$!3Ou3Ht}Qs-oaUA=<936 zfoUVyyB|AD?tQ?F$Aj3xd~vK)TtS|x?{ipGIO|qc-Br z8}Eqqr>e==E+M?ZWs6;5Pguu~GPBj+%m{QXSW!&LwS)B;e=Lx*Pi}c=U4T_D(qH^u zbT<3+W=EFcVKPr$^*Xz7u_rUd3DKs{1Xip3Cb2#xN`5rz0$V)lBY(!Hv-k?{e(6uw zFp#J6P*JPO>@Bm8P89)V1n zw*7~!_SH82Q3t<~n>M7#*NeZ$wIR9ucC}}s;_{BH&hHn+^N2%k*7TvQz~aX&<*jeo zd&i3NZEd@-8<~8(ZRAijfd$LiPrV&pxK?rgqn?Y8tWuc|X#1Gu z2tDRMJnttKUB3W7F|UUlo8YhsBNnld3v&2pe0)+gviGuzbK&fIV3W8}_>efn+wmnG z#-tQIl2^Xiu_Ul#OGbV(vYt5eStb7Zut{u2gbYkrTUcg_t1d%2e#+7Xeu#apLEH*=QA zK5aGAe=X1By+!4*#$dZnu41)oR1!OvHe)5v{Kzg>Iw?Z#u4ZrfUx0mG#y0F<5~$bo z=N(lS8gQ8QJdl7Ap91P@%oXztc43+mQ^@v!!u^|u7Qjq_W zE1PJ$B2>Qnyf|w#JRr_p%Fo}K8_DLJs>Tl-vUt53pNda96_wYHd@AZxZ58NzsDt?I zz-89CaVh!1wj#38?Kp9{PeH!*`=xx}&eP(2t9^3ysV=g{j5~onO*6|k-u{&L3~b@& zpEZ(KGG*r*qL#B^U(NFmxf>;qWZKK*jvv_i%?0_FV|I$rL(fELy>xuekMZol%p8Hx zLeKoin~h_o0=*;JRjMZL4)g}PmRu{V9T_Vpp4i5`Ep{;Pv&CY;s4TMN`EWTT+`|(m z#)@}2lP72O^H(yf`q`Zk?6U%O*yp#Fidzdy$+A^kKJVatw&m+@{cFXxlzuy_@a^r- zvz3{D4U8QzP~6LAaktD$QRdiA|F03tM9bFYxOYz`mXdHJP$~N~@$Iw*d~>IJBHojm z?H+P3qS~*U#oVdm1J6DfDGH3)A|CIk!oR!Iji;>LEGBLlE^hY^=bseLEElg#Vh{G^ zrrXCs80er+mLwDKT)LPilib^3m)u;%te|yo7!=0J*A^~Heg{OqD3Py`^gm1q3t4e zA?^mNb8RIv@<+2D3S{O{<=^6$mi;a+TNC)lnQHS{Pdo8+Lly@{jV~y^7$YL?KaJq6 z9t&3M`7n9p+5wS$ukGKnv_xR>wNj$-zRWV;`45HDtD!&FnMEnn)(>Luw3yH4CEQ`L z>94ar(+-M{kKT4SF3u|_=!N8k*V4(5Y`?M%KUHU)cCBUK?z$Nm9caNXwF!S9GNaI?OHHWZX4f3L*yPB(a~Efd+E!~5Cmm(GfDYrXQdMYbHddp}F)mXUXC z`e~p~)vGBrSLWtZR(vW(XV~jMaJVMlQ6h{FxMfT0*Ajfg`71lh_Lg*oRl@zqhz4!yCDWnETFRM=TanpYdK-=5@)*rs<8>o$~;AH3E= zc)iP*X#cBNeQuGze(|Mj>!#NJ@9*6ga>J~M;#u4K>lN)SZV&eG!0nv!la85KTRWy^ zQq(>FrRIy7+Z}Z3XY)k4J~>4nV-;(-;9yFTPiBiPKW63+b3b9}I-C;Ahh~)H?mlPb z=PVJYn}+Zy{a1<~`+O&^wdVmj`P;zbFYQ2=DIsjhNrN|euur5GJ;X9Tu~@#98F}K@ zmj6OnPJZa%7ybejZn4U}z7pBmh47^{_KP)aq)Zh z=4{)oyu4O{C2a8B0fDcMW#O6R9&zy0P$8b5XH{=pW8>c_CT@H^k9 zr@XP#;L{6w`0aWgxvXyv_T>hbZ|prvly8$yuIijy?n}-oi+np@RJ~rF_x6_Nyh1+S z(3f9c+_Y9aDD*YsXGe=ly@Yr@rlD*+Kda0+wgdZq>`#e;E6<)NBG<)s>p<)B_O+>b8q7TXKX6oqT_mYyf|W$pQG zM7QOug!bhJBKPFI>`6!_el#Rm7JGU{>|Qp3bzQtQVD3tiZ@-n9oo~(MFOT<&rSYCX zt_)AajR7}A`N`pO$J;gJic2#^=G)`^@6Mmfe{c09VpqFNGJl0ua?Q~7;$HELfqg5l z$Qj+r%j6eL{LOzGD%%~cC$}}eD(+00#(d?9$bx%v%Iu{^rj+WvIFPs2LAGv36Vdd- zXtpa|CV8G+PWdq^yS$yfNJ`(9O#;I=b(A-<=aBCu=<;~miz5E$Rx!F}=A8%EO_U4P zJm$CV-(!559T{_^N@}6ZpYSVNo5~kEkFl&vN2g9rEW?K{=*edOIy10;a#s1#tS|k8 zeo9Ww^m8n$yJdeMM@=nqv3s1=zh?7tE0ZIK^sLHnuiq^17wjp=EPXxoi`gFvGv8vK zr~4;7$7}OhhGH@`wCydSTqdJn8G2Muo_?Yv(3@Z>@1IXXqw&EHX$j9C~k&xVTM#y@4h zmYU7tj%zY(d~=!nV*&XzWgqL|um6(I8AHN(e7`E}@S_Lf#l{&ahaL}=@0ZLWHgA0< zZY{_v(zE@ddF#9~SJQZDtbS8|6gyn}a`bG3FUuPCV9{LpLHTt4n?*HgdD6>6#aqZ- zr&CzwoL+gUR4Q}V4j12VFDoy7Q&dKEE+xOpKHDF|mx!PD7iGVESThhkv!s9L&{<+} z&up@{F+`p_QzYd^V}WwgwE{WV!PdA`^(<`Nrmr!u?ezb^8>xj;4x zxys6B&mendKOm0v2@xw>hq6@-F8HsH+%1lGpDvbkzbULiQ{}n#pRyJc&WgH~G(K_q zB7dodk5flqPLSst6pWmFy(FJFq*3JO?|&;tPnpX$9B&}MYqr}z>bt1a4rR;o4j)%# z4QI3x{gUUgk*C91t;-RqZ|`m>bLF-~zN%?W1lOOG5 zJu9|N&AjbKiZ4$uUa0tazG3}rZW*Uit+yu0iYIr;Uik~iN;T4_9_djkb>gZKvdsZ5 zE58wtlUBcw>nd*$HQi-w&c!D3tr?eD>-KNUCtWMb3LkyQ=Ioxrq6<%#l{fbjGg)nx z+ToFy<7{WoHkaWgD&~;Shdh(_{6)lfO&sYKE*H3PI$Ea1&zI5toi)79OT{3TMZs1;}&(`kCxv z&D+1r29IjUbWx-D>f1m0omrLHSA8>Z&+~8OvH~BmJcFd1l|4=d!{_Pv_(+x_{{L?7wGEehpI(<=c4;D8R{EJ0NU9yVaOXgO>xCy|qmAqQt)8w)$@1=e zF@5Y9eydS?o(~S|YrWYbzdskwy5*fNdmb4gSLFSgOM9~ll{+19~3tyJqqM|rt$XA zzhfuk@&%@KxyH5^DlF^I{6tL2kvWxD|4{5)Jj*Xyr04n9=SuA}d^rC-GfTbjVr1lt zL0Nfxg_eBQlZE`us&w3X@1TFd%~jn0;Zxz9*yC@K6v9K2C$juiQuvTbQ^n3NX7Jg0 zJBYG(a?6~#!ey7HMI(phE#R;6)oRguU@7p~&!yDvRarK8f0fAe-evK8qQ%}$SB{1E zY%cent`g{6R+ntcCV%H%oDW=3iCg#1$n=M%$ix%2nEyq0*1|JNez$!(@A%^z(rlPM za%4ft&({dyBlqWzY(2vlS!YQ8$lsTK%JV$=*57PI<$Xx86%hcrb|A! zvRM9NvA=rL$-?{YWA;|ajX;CN_r;N@o{`ThtdXS_f6Q;LD=JoA&lFjA)OKFx;5>fy zdI)bbeT{52Qp!_5hqEu|W#&KJe8B5_$FdtIm&=V8ipX!v<>0%w?v@ej69YY(@8Ks( z7D^2a&JcMhG%Jt#WGe4BzJQqV_I=*%__-aoFa5w?TUSOlxYkAf_!?o8R|)GOzKdT3|%d zT3%#LPaa>X7jIVUeNpxPSFB3E{w&MV4&r*dZV}$Fp3)QCCv}&Oi2UwCcK+_^(vgQ7 z^_GbjQX=#DI`dW83q%g;ct?&u|DK%tSw&tTSU&uN`l)NLe^_gC*D9&rAM(h4`#h;< zwslOMabdb_*05lzKkG($vtXDMEAyo;iY%NOZdXp--Tt9zy$8kRhqlS z`m^I@i}EtXXY$kUHRk*Ge;_}6GFp~iQ<~-aXaq06p*-u|tO#5EZSIIALx)KB{s101 zV+8x3w2K?(f+4yXklXBe&ey zGn~CWc>zB@qn?=jOMYIl;B66h_#&Gg>g69U>(6`j4C4!jFAX$aGlMtm{3gHR4CW6f zMe&l)DoO9jMSjy9BctqdqJM_B`2EDi;{6K6c*#VMsGQ>)HvFDcyOW2d+7ri(eVK*-6>H%TRT#&m6>_v zi(|Mf^tN1EaD*(=zDXeGqCBE$pVJYYD-7mkn)H|JeyYST4IR&?ezp9ie7rWgcxu~s z7RY1iKbJgD1KFcn?#K{dQNFcafyk(t>E+lJxgtXb43+8rKmY5W&?TmGzuqy4gTnvU z`tZV^ZJS7aw&@L0gVKY4>3q6r*7x-#Z}VgdRkCC4O|DU&X-cS84= z_&+cA-{q%GB#r)G^7mi(AD0YjrK!?r9PfoXUxLjle7QcI?3SX9k|EH47v8*Iqd zT}p7gUdN>r$7gDqN`u0NL&&kHH`k}pyQ{gD0=lEopk2fEQD3fOyBam*x+aYpCT9D* z1auk(4AzT7_4zc@@e$A+!?6jOG(exva%qA--L|PLbVNM?FlojBT#cyfGgMJmv(kV~ ztr=d^b_h6t2)GutZ=%E#fYuxZEQ30*EcLWq>c(S>s+PRP_@ScEKa+AM`MZ?K#NrdhRsy}RMrkBJ(Gt~fSjWkV|7^X%Qfu$BsM{|9S zR{;=790NQ_esr&)Q**l4a)^UEH0V`j0O^jUkfUY;$h9=@OJt-WQ=wpoAo(T*wZ9Xh5@t2 zE(|I~(=|*SyJ6{$kH%nFh)soDS_lK?<*IINn7E_8L{=K2fh*($k>yP*XY2Y)TS90wf=HvM=`}6&4%&&7x`hi&696FyQcz{IjxcAn8v&R;(lelKXeK~yoop{5 zO{Kxcpn7_6;>svkrb#0S6jW}nyUfPY83#yoSVY$R& zAfQrI5nwv859UiF0A6S$A7C>r+Woc`G|e{5g{DV*w?WC#GD2Gp?P1&Sxr(|Z*g$C< zHt@!gfr2NW;mfp0+dTvv&<18YsKL%v3waKKhLIwgVBE2d(WPCLi}g`*VUDG0Mj;zQ7&u+wP{@!3?kKsG(s94 z&anXipph*Eu(&9N;40WSC`70pOakD0=w5|DOjH_J5>?EhcMt%cIh1kiXx?v*P@Z%0Z*I82bvMfYB+CrQW7k#ZV~{ELg`=waTPT5 zGYl#NETJ~(Uto(vt%4_^p*U-3{1wniY=FAtkarq7y+mMFxddF&y|4py^jKiw z!OL-|475di7m-edU0}Bs1QsnKm~YhoAl4vz=1YVc5a$Ck!R`%%%ropxSbB?6;EbhH z;|l#F&3uU*LN;(jdx>m?$fE?+1FNnYhRuZ53;-B7ivS8Um2&WeKux%~4g@SXw_*F( z8#sAhl2Jgg`ou4=XJnUQW7xECU=zU6r79NKFB}IrvknyjJ!TPgY1Kh-gU4j1Ap_g9 zeB`qMbxBJ?Ke!_Zfc_*tDumOq4CN+)mWCy?2-r$=EYPx=qU7Ic7HB3a;iyHiP(xrz z8zzMefz3hVhQ$J7s|S(pqdbj{N1%Z~(Gk;ZL-lN%swkX9ELwH+D_trJV(GY!LarBG zMDR^xub13$pF9qIt;SWXbwRK@`lYY;G0GY-lf0ozcu zfp=)s322l89M;Sruv7|25H$l!63nv6z0*;Ya9aRybQ&QZWL1t!&IRrgFIB?rVo{b4 zjer6qfCO}^?NcWm*Z^vy&`Aav1seg%QkF?-P)B2I69xN09e~oN1TaS$)dkk!ZUM85 zUI_t{)G`3apm74<$;1J$Nnzo!$i_Xd<6#nw3~WR2v&^A5B1qVbfZH`XrMD01R6YIA}e<6^FCFHx1YXKwD`+V59-uqzK5v1)$^*fgUWZ z(fN-6S}p>?mH`B_NJ9{yVIZ(nimnH18M;pu1_@GSKuA?zCLL};g}SC1suq-1!%zbS zK?(wu4}JxnAP|^dnl5si`hktAC_)0*tODdz5zy@*Q0;*uO+`SgOseD>!C8a1q1F>z z3NeK zG^DYB>a?5&?ihIBCMDpY;zf!Jg_Mf>Na+|DpfP=fU@K@;72Ho;78p8gV_N|n0PI5Y zron(xGFT#V3DE_nWUys8HQ*r_5FY|tC+1_&j$AMhH9RFC0FIgjER!q@0CbNL*r37C zjDfO;T~0}`lYy(Em~MbErpdt2G!zi%4)!ypI9{FF zuw0xw0C1a4s)TU@>c%D=4MOgcLqq2#)Ga9aq)Gw6cA+{rYow_1DFJ4zsDo2PWNNfO zVS=D}aPp|}@KB0gl1{6D0QH4|Y(;%$fT1JJ4!||js1c-&qAsPQiXrMl`9Z46W5g&( zU7|2$jetYe83?I3d_^5Ylv={GGBsg3hDB|Fqw7!#oReu`0XP~B2g;&C0btTO3;>>@ z5x@{%0+JmBmM;xZVGKG^%WyDJ<912WA$3Tou?5&AYWCnzI3!ZgvX2(i1##7hYMK_L zj-u2KP($T|Ym9xnCLYrX=!T&v@f#cl&;ZmWa7wY$MXy#RSr$pRYk_Vx6>x~Jt_=re zO2N|&r8t^~mJ1CTDp8;og=YZR6yHF5OYs;`vL+qT0HAY#z##7pC@EU^)hTG-^@#+Bj|sl0b%+31PsH3V}q>${Wh-2I|n>!Vn+DU=Yx0OaVs$ zQ}tzd$*%)HKvPQzfc7u|(AHGTqKGF@S8*MbvW*@Z#(c=yAfSk*iEfD3P=K5b1k?yv zRxg!sD6DPbp#jg}AjZlGBY<`Y0J!tE0u~7_`VFY8O!6s^f{8)J0>GsanCMqhWl*^^ zeQ1Jgpb3&~d8s*91s|ZSMur}MPJSN%v~UWLE;BJkM;y0wN9}m9ZYB{h8rTZV$4la3 zW02h@08SUBz#gI%$15!b48>E>!xRLD(~gfAiCzkJ>}YgY0;+<2I1g&nK@SW^1Ui^P z@Sq=c;&)`M&*P?wY% zIaw|NgD$jyVw)CtG!CWvP*q|EXdm)PEeuNI>BQ1Bbu_{iq*g(okXIN3rZ5cxbbKug za2N_Wc)GN3n^9GeMGB{oQATwHz$86VN)CK5hmEtq zQi|!N3p?-~Z4@)4wC*556rn*tjsyT;hNS@ctq91bAy8`$%~E&`0C=j@uIr2#u0Gv~VGb8x|TPDgfG7m9#M48w9u|$-LqoB47qn=p0Re zmyfs4&WZ~Ta ziZ%ipj}{hKL7f-`3r)^A07p#%97Kl}2}8*S6*dftq*xdUK}X55F+@jMjtcH0plz;z z8w8e0(NqFHz$>P^asVm_i$1K6hK;~fStzswEV}Xo;8GyK0^LusF9Nj195{2BJ_RU- z3c#W}ONt>_c;!r~rmgr2j-W=I1)rAgrC~`;S|%5Sfy%mG)N4UNOb1};l;Tri25>($ zn>g!gN+}W`dBa1tDy-=gDgl6I27#{A6*w^s7YD4P%789tQzCnbuNd_)s3_P}+mPH;b+CtLL z2rP{XYv8jhVAFLc0Pr-u2n=;I3&0?c6)h-n54{pPKfyr<7(sH_`wV2A>RdADoa;Src&R`kerB$<{EaWDu~)BUF>aOGDgoXvYj%KLj@A z!^vDV29`+`1|>mLfmxHHx6vm>!G|qqkWZ@sm2v6(3KW$g09YZqZn5z)9nF*iSP8~P zqt!7Si9KN$ik6yg-qV083&5q?fT#yB2DYk&7uI?jFcnZMjFBYOpt6~ul)$-612$E% zOez4j%}W`egDvU^_Am*Fj`wCbPQ#10BLrO9F9gu)fabvqbpjSe67Vw1MMYzyee)7E z$#2Jai$#?&+CU}1#2DTnAW22w1_8Mq1V|tdxK!O#XFmXB-0>z#4Hz6PHCu42sE&zW zVHs(_Az)J|0|P|FH`s=1Zy=!dO$@D4Sxd!=008F=1t2o&c)O*pDg z0B8ag(9(dO2COvT1Oe?90@N~ojX-5ETyH8sz99fOoKRmhUhvjz^`=>BDy|CK5m2K; zpbBdgvO?en+tBE!Z4WWJfA5* zk#7X#gc5KSu;^$4z@`WZ_zhkP)}r4KOabSZRszhZjwccnS&Grx7@8+up{ei@Rn89r zawX6QvS~JGIE^51N$kL0s&;J^A_3q~a1?KF4QkI;4;3WFgDAy1(I2u%BJea$Z9!c+ z4cd51OZuFg8p?9%0UJ-ffd_}Ipjf;DFe_}#Q4fdC);8Uj8EL>IV3SEBKyw1i7idtSpgXMH5n6BC}>88Wc)twYlpDITm$vMFT4pLhWQdQmx_D3Ew zQB|G-phz`ekgD=_usz!hrkkoi`m_&eusy{EFcUs4NL4+GYYd*43@mTczjYF}2MGaF?)xV`gShwg~ z9-^u~-H9Pp{aQZN4>>{6H+B_7M+BU+QIZ- z|5~sg`Wy+VT4##_JxG13ydIp7ZmRZmD_G9ff>gCnJ~z=|Iak#)g5x%V>qKq{Qr#D% z!E&Z*$50!{GJ@-61lu!$?U{<77JajeG{^@KM3rC%8>FjzqN+nW_YzeTwu1{~2N%c= zc4!9|$aaG7PB7mMz6S-s@#;aU2B=huZaF~)1qIY{f)jPjAhlF^*Q*7oYR?TWtg9Gq zdsREOSJk(J3ImQ5nxak`rTTb8KflOKK@*%{90od2G%w3X;X?e@nKm#i8!-}A)u03$kHA13FCEN)()rp6o_oM@8Dt&6 zV3`zXckt69>Ku!?KKkqwESE)5KLl!vf`vs6!l{s|Vf3EfVL&)C{D2NyKx>Dbgi7=s zD!4ghPF)mn=*10IGaU6smmz44Q0`n)g@3Sjhp0nGGBot4KVAHU0y_ay?BeiH23)+i z!@`*1y0~##)~xCC)vQ_F2V4sE8lk$krPo`N{%QYDnP#Zh_eYsv5JP=hsQxFJzEJw- z4>KJ%)aQeL{wNb1`cSVH>NEZ@Q`4|dul0wSCX5zm^T(Mk4M_jfOea+P;{jQEsLva! zX@8sv9a^E$TR*$p7}5G?0=Ex{EIyIPxBNDeSf&tii3ZeXZ{~~ zt&xESb{B@fjDa^wLLW!-y^>^V!Pg$i$$e{vhYuJqAZ$Qfueh!;$uVIGiQU81&y&LO z_Yq)csfy|Si*>-mL1RsoFe+@l+BF>a{%S*b8D~)~`!80(un=CJs7ku|7ppmV;bjCX zVF2}&A%jb2+png8d4Dy<`F}G&uqnP*c?B-JqraMB7>@N%Dd5Zh`%4HX|9`)d3+F%g zzxEDC*?)T1y|2ET`aiu}ufALV{Qk;9f;(rJ|NcP1Bx1zszqSN+(|P6H@VY+BeKpDW zw*|!T;Zf$5Ie;e-=KMA8<7FBR7(dkbw`Xa*G9i!)%W}0>l3tmf`3h%D&HL|edT>PG z0Qt&hu)@$~|91v$h1u?3>lI_NSON6xD?)B(|hx4(Z|{eRi-zyDW$`~6q;Kb(#KdH>s{{rCR=&+q?wwOiIF(5zMMsFts{ zY)8KeS3l?d$FCdHegYW$)m_?e&R+fmEbYfnX+MSd$JhC3-z4st3K;&qZU0yP`wy|bW0K<%{^wnQ|7QPd;1K=u{|APfRhfCk#?X z`!EZ8!{Br@hWz2-`sj&6v(tuarw!J@8V=QUz4S;c;u`dTR~*0Q#o^(9FM*yGK21s? z=->ky!Jo#!6Hb}%#P7VK@l(iX`ZyvQzmsvY%Yi=}f~ODv zg(MpRCRhX@3BM-sMB`Vd(RlrekD9pN(4_%7HrlX|MFt`}xS}S0cci9+zgooyG;P5k zsC}9?^kLyd;1SHjo49C8OXCyXaz=oRB}Vq1wlcMtF{~{hMx|r7gYc`O4L;ysW(@(>%JU- z=2g!i38q9F-bKM{LZ<#5@sT9kkpK}49Zb0AP>_LEN_s!Y#*6Qgc{Ll zcp%t89BdmQeuD-OnT`ZGjyyqZ>Ho;4k>P(=fT$WMeplA~<#OWo0OBZuS9eHsDv8Ye z+dTgN$^NVJf2V%&z5cWQKYn8OfB65F_P722PyDq=j)CJm>f_!C@!dU1^x7k?dyfGz zgFLOmT7-G(CiII>9^{EhhEksRgv8_?p5}4=V-r1f663mdkM-1rL!+mUr$uuQhJVe_ zK5@NcJ%%0zX0cZvPf~1R|2SwkHYO=KHZk6l7~36OOi%APc$yw_q9yT!%!?&@jP!jqgB(;xqHbunEM6OxiVE#hN(#d_-Z?gPVV z2*d0hJIHfw#0*bkY!xOG8{6B{Ibooudt72yC<3AmV}m&kg1$Se;dhBiOdNzGOdgQn z=^Yavo8&>Woq#iJ7t<@QPmHHazr;j19D9=b#>9ID#3XqV`o_jX3&{yMo^IXZx_BD( z>pjS0c<>g_(+B3-FA>@?yx!1m2?@AxzA)F*d5}l5RDvDmcm~9EjrFuh_SB7yiysgh z@2Q;RsoSGpVwWC}l9(9Ry-P5?TS6jEuWw9ZTv88D7geMSG9__P61LF=#)j)uJHC7G z*d$niHZedwePIYV>dyV*dUwSMb_LcZdAcPg^zmTS-4oM0xkp03?maw7amaI+SLayh zuU`^O4LbDz6p_Z|m^)-`{_K|NZ^k}LwFqe8+%ttQ7sM~VHv_1K6WCnty2-?lv*ZU6qe^>GwQ#cb^zoNVpv z9C|o7irLuOJKFRR)3Xu#U+~1o3H@XB^u%JL0|I9J5A^MZ4ikT8&@SQrR z4Q?RPi1imn|BGWGBGQb%`w`$T3=H#&3!m94w*Q+TE+!&e_)}o>yF?fs73w!7ct)H^ zI_{^eO&%Q|8yNgIBW)h|$A$(Ae}5Yp93>2oj`I8N-*2*5|7m`$V&M$G@SwQgrT;F8 z@sAY=|49-X9O2(;?hip+Sahr~B0NsCD(cT=ZMh1Jj+xOW{wbaCOXMS?gMuUeIqd(O z{ots;=-;={FUun0gpt2H6BZs66dd(eVr*@G6Ng8|#0&o;{VAODH|v4%!sw8Y-_!ad zpuv#=!9oA*RrC~*;lJc@e<=K(mVc7}dP=^dL>uY1Z~PDJF9pA#@W_Ao+W!z^{llaD z10q_n!r1uWe^Hwn9q1q67v&!r92ess_~WGh^!JZ7bL9T{ufNCg-_v0*B`SJaL~u}O zuwQ7be@xha<&6J^_K=K`JV&x-y)g+areKJOJR7h z-?ZTH&@f@#AEtkc*MBjd-*cY}YYwOJ5R zLi{7*er#X4Ib#3&`TyzjqcvCw!^Hl-x&PVPIXcbpRCkfg6jVTH*KWNMhBte+A;L z6*4f)$}e6R9uX%ey2l2@herhYiHcoZbgbW0J2`=bsj06_t2ZJ||IlE8WZM$Cei70B z!r^T%F+KF#>9)fJTX)#ko6uUHmKtuxfDVBL2`rIr3mB} zg|&JoFBB;F2o!xgwYvD*rQ-qiYHU zT0R18U&B`VFE=x0$q`vW$i?E^jhthw%XBe zvt!U^$51Hf<|8orKVc{AyB*`-gJsuxsstu~*)jcT$E?kcd7GW?LV<;kz|vR$-|Ymn zzHIv%tKXyMY;R-dAg~st#8hO&=DU$(f$gAn$pX7J1NK6JgO9-Re`=uZGo1c3;9~D2 z=<&mV^A7_qtp;4%47dpeoR0v$Z>89%s5mQ|Um_!#wWbN!uULDV9`;VoHiDjiMaTVT zbb7T#$D@tiTPWz`Bk1ef^*`)?N^-wH@uK4(@cawk|0jMx8-8FLevnWw*hetr-|=lh z8T!xO5Bm%6^%L*ihWBa14;KnX_y|V&cK!qZx0H_hBjV03cFuy)KO*k?BjRIPBR;l` zJ5DGV?<1J-Z`?MgC;s8Iqn*8-!0!il(hshGD>tBx8z>Y6`3QpllS$h@Ga2#+*2&Jn zMiBZ}EW&=qBD^gYliS!+gn|ekLF9izZTpXzjQZ2-*8La#7e3}EUeJb*ZNtY21wtP| z{J-Pdf-?1=y`T0Me)>=Rj5hqtHvBB1V78B7&VM3q`wy9%`&$VR6#)k)2f@4_5l{FL z@x<1M&u`-{5DJof1j+x#ZF4&1cdmU8M;iyh!XMn!AKXQ)+{JC&G@)RLk0AX&nY8;e zlNrBb?Huey6?myA7Ai_2?y^?y^g%i)g5`sBT4Ryf#$F*5tn?9N{U_9R|Cq^De|YWO z8tT;|{4PlmKD!Ms3Uy9fsMoaN*9rx>K7zb|$F~I~|DU~I_m}qtKfNz(^L~9BeuGf3 z(MPc9KM}Y4hfHq%eVd5(ps4e05k-8uh`seYJ6W);HR45W-0ea^v5#QKzj51~F8PD& z(8I}Du=59Z*AMRQR_>lQ?p~pw)JL$dtx<;r`u+WXwSV=}{eQ;7$gT04;ANrUijUyxPd`J0qgwxE|I^QFf6gbZ z^VRhqe%Ab)t8TQ#^ky6OmQYaZBe?z3&!5=ue%|>bo-X#HVWdt3?I?oQe}^Uu?zZ}Q zuMK!#D0tu_c<8JA^TT6i{N-cAAF*__6}7!bB3zIN_qZ*VA|D&ue0Nyg{_{uh z;k&%q)VFnP7kr!}`1F08{cA4&Jg8k<+gJTMAq&3D5qu5%E>-!5;Z*Q#j-aJYton}w zW~^8{F|j$Z?Lfj__Y&D{@N>kE0@y0 z77v+UyRGc6ZLIyTWkK%OTa5g#vYFo+n4&UT}R{BBGl>Eg53Gn642$>64N! zIz4*{^&gE8QN(fMLh{+@jLYoE@@m@iY%MuWG9Xpc?)2#R3G&zwN6${&=Q5}7rP52) zVql1go-c#`=_kRjhee5qoIo!AcgrfJeto zS?`Y8q%ty@PV*PwrtL%8w{Attw z%$tv3X!!^D^lB${>VAwUdKYzbE2HM<6P(G6QgZO$M;|rH$)`gqO^8XS9ws*d)2%_J z@etegzKjitDnyG{gIJ)U6w7m8%Y09Erdt;eFpq78Y}e2RZct4NpILB=-p-!FPIYR> zWX6?K!ow5nB9}%s>vyn#D4rQjyM@!iBiGUI6Q@H{4vn9lmeFQRvq?U`i@Pj0zIl)YVm zp+P)dypcy&y{g&a?Imn~u?e*wFq${9r1ucQq3NOm9r_YA;cS-oT_$(i}wL9LQ zxt0bwXD}1bo@{z%A>V`df@=oL>G3)P$|(vWCE0;A&nJ=6O1x>gu#gS&UWtS4uY*Ql zoLi5V#_U5$Z}u!_08Lvs6R5WyX4ek|w}l4uT6`s}FK{9Wlz_XV{s{^ z?@7Sxy;iV!w*9bvqCLy#u$0|BgPB9gB770sL#ya6 zyn$s=y5w9dQ=!V#*|=46@O5YkE6~Vd&FPQbrW+N5*V^R>)93K@*;nv(i4YE%CR4Iy zDIPePNI6TjDRxUUmTm6FRwY&8(9%TaYBrTpx}~xJ<+J#y+d2%q^cMECTf(erHQ2Io z)ez$u#~Rah*rTL8c1J&*>7yUCbTcNkUIpx#O&&XXbP+A*&fuz*3s}KjMM~B}EIyUO zws%Y?>$oyp)3qK4w~b(*cP^)uDL3FllOApB)`Lw9I00e) z)~wxU4nki%}t>K!ei*}vlK>2KJ+d(gD$IeqVq3eK)+uzR0pS#xo;kGsI?2VL50y zr4Q&=MX)*@FD9EikeqYNxMBHTbku4!Y1_w8fSCo|dX~nd43x1;;cPf7n*(ny@VIT- zcrwVdhIX@Rp?_U6MYsEkYL{2APA}$@i(VBT?mwOl)s-aa>s`omsW{D)9!l@eYOsfM z0?6-r5kzXPa4SvDC9~>63XCdbE9YK?VONsrP2n=CI5(RvC}%R-EkWbPg)@Uo;*76b z$OY6MfHxN6UCEJCYzr z>Nfpp-82>2l@&%?q$6qN&Jj$0d=?Hb(_oEv3%K#gUaW6q6RaF-FY50P;HlFwjD2be zI+x9`Zj(J9Fe8ghq(fLPpUGx?JPCU;BT4woj#uouik>#P(cBN|WR&&^$M5dR9JLbJ zlA$NT=g1u1)zXFSC|Ss~vu&x^X$5WlVotA?O=W$b&t&^TQi-nZhwSLr!wqJH{W?8jL0PrqOmYO9;vp+?3CHm<^?1$&K7!eWd5SO7N~e zjWespGJfY7G{1TRW!{XYJT1{#%9zIFxBFAji$dt@7sWPsxO0=_=hLPAi^$}H4K;|T z)4X;aFv-0f(z@BvsN}nROOie->V1yy`6Zgn-SWV4iyUoL+6b0)OUNe8o85|@h;s|P zafyo!nVP7OrM!^sx?%@68}(>qP*)1~=ji$RM2P4=8858YgJ6ZJ2qt&&(7HT$SgMY* zl_TiU2Qw%hKApnT!zgBw6m>|tg&|xpnHk^3M5AQ()pr1$ZLnk7j|!-{-%46x*n+Df za#`-05fzof6K<#6SK$5w33MhakmNQeVmYlK|9+3~Y-24*_FKiGHYHQ=sg>+ljWVkr zypptv`_XmbP#*p$LGQ9 zKnG@ZBNe;%89<@QsuaIxLdDwbNlZa-g}=H`hIp&vn17=l&LvmkYpG6TXFr_2nNA`1 zgn0U}egf55W|I8+DHQUm5*&(@DZfW2ieH^edIKji6(4;p8zVus!D^6Mr9%dqUCDpb zTkvi1VhX)iQ-N#?^zyP~lRoxhO;`0PULgbnN{% z20OgVlG$4ILub)!cFuVrY}xxBI-6;daM^0s&2BIEs8)}ysfl0@kIm;7J9x8;s_$WE ztqu;O$q? z(j3LA46WG4Z~35eievkm!tr9`7Z`FSiduxRq>S3bigelt$W zUd{5)%Ch{OYHma7a@o|>A`IOjs*XFCqs-QRRJ-dFXx>_b5pQ+*(PJ{{fr}rR6m?|x zUU%jn6`Rt&qb*?6YZ?hR%td~{N;<@AvY=%Xsb9S}D^I@+nTIsk+fz%}8vo8Lcf>sA z{W6HPU$zXtHswH)yBk^9&nE}-UZh>3j-z5{(2d$gP$mw|8IMAXJXRDpkTvuk4&p{C z(8c8fJV|h)fadGyk*zB7EdqOqrLch}IqYUkI*Z%Z8Ab+w!f3;Mrt#hr6$T8XDG{s5 zTr!%e-&&7cpCynoDwDMvE$a1xY?{|eismeKqPrWM*{cf!+1u#pZYy`&(YM75sC4=o zYV^=YQDmP`_-Isn&P12lQrP`CKH~=&!aWp)CA!oY?a#}EIjE- zvWJIJ;rmgPDd<3wb0yfB%L7^U3>9KzzYMl!djsr;6LTVLt0pd-2P<$NO3@ya9V7wTYU&C!jI7Vzm*0}kKnf}M4iLyTPt^NyVe zb`-(J%pvtR{QbY)B?^d$QWJg;YODh4p`)MibBWWXH`?*|jz5%riTU?fYU#I>QvH)4UX> zz(=sNmCKlN$$l7Z=)_#}C(!VFg<$&Q{{Szinb07&CQviir>bHddN59d8IJ5sf{?3_ zG^~`@-nf>k-di%qfg_porG==o?gi9!T!)IIu5lT!Eb#KyZOUANka zwhqQjSI-zLFR!F8oAStW?pqK~X+lZc1lGR8YtkE304-HpC^TvVttiOA3$G62mW`gE ze?Xb_-D=A2FRtNK>;Ol(2QU?-Ha}nWwZJRJdr>M~oz-hAQOw!hu)kk6zgm&;4{Ftz z`;FdIxal=)Juw)Ql4MxM)`MtObso#J#^L1g3$XUgT%cq9@Iik?w)oQ3icaSzVyIya zU-P&d<;j2J`^s0MLC@zfHTDwBn`c0e4+iir zK8LfdSZd&7=u=w3FXu<1P2Uly&pWfob1r;V_kPUUNQE8$_?CY$AqQ-}TEn&C7f?98 z7;7hmL*$-~yxjpKUg2YVwxT{1Kint*$pQ(c>Q@XOzN+y@drQ!?Q49G##!E@-2~n1m{_8zZ`+#_=O=b+bzG+QeXf z)sk&!k$eJ+2dKc&mF`SwL=1HOvJ1<)zkzd?PQj>lotTuT1dVFm4-@+;u@jPOp{rIW z4o4cIyY&M+KUJENd_}X5xe;X76`)Iw9e$l!in|OtvKj+r3aH%zTSWU>RhW(uyKZ35 z>({wmDGV;>Ec?HDJx&!&rQeU@(mg z;76}X#eLU>EDMicX`eW1eW#AdB zO9kWo_}J^s@TkBIKKNXLk!AH*sofEGcRBVJ6o8V-m&Mtk0}HWjXHMXy>j%Zke7j8~)}xHawAIS3HgAg{=V;OxI&hVYW0)<{%E6 z(jQOVFGtl}1J>jCT0Az=ni)+pB7^vLtkC@-JXvMWoOgZ2=V!#+RF_Qym1hc+>@C5L zjVj^J8CKzm@SUKyqZ4KDn=p0oc{rIO&$928VN=v4*0j;3vTw3DJ9*5dr%Lu&Q1A+4 zAwheYLsc)tboEMwSzGC3#!$R(t;%#X*2B})i*Z}S3Z@*mj+J~^T=DwOHqe>Sm7D5# zoPTDz3{+F6@|}A0r-fe_*eSVWepgl3%5~pvp~nVoimuH?E1zJ{sgPsy-1W#iJO@%g z&teVhTVUS`bJ|RWaDB%BbbX@>8E9Ax$ai%RNaVo-_GYsLt;hq zRX!imn>f>Fs({bhqrwwO3Y9dWFd1295Q66kE1Q!t5#$^`(?wVxNK0G zSj(;OT!Q|lU*N2yJew7!P0vd$$WK`aWnPGz4i3k|r*x6;X-3Yq+N7f(#Z>zo=SoiN zvLL-gIMvvnW-T;_Z^JvW$WTrGrK31>9Ptbl6r9*OQf3;FnvjTx@s8FRn453LmPA;) z9eS%wY_1C(`f`>tKh%Nt)ULvzaca~d;3;nEBF!?h)Mm_yoXOFouGqRPwM5hA7#b5v9tsea>>-7dtW;+4+mRjC-)fDJzUxOYnpI(W(ii! z^X3z>FXH4LpLpNm2{`M4Ip?*e1LI5gU@wV{d|b+1Xg=5nc6?J}pLc)7v-`D}<*d7~ z*u2P%9UqA4Bef{z^D&&{@xiV9jSR^fO3<#{7B2SQF6_|TguJ3QgSw?QJoI+Q4i|dR zgGo)Ur+wC8{L-T^Gj3fOUW?Sxfy1?ktdUr@)U2UTp{)J zAZ$)Z1u?@My!7e?7_HvVJN6ud7sEU_pTg&^V+}PB7k4MI2yNz3{t#S>r0M#Uv2MpR z&Y)?w7THX)XBN66A@0#TjF~0JUM7Eof_eKfxU>=NGmmiVc2B4{_Vp0Ar-1O7SRFX3 zmEekX#u#PPj_!B2W&_TGXiTG`NXpA8t&(SWp0mvY#?Go9FS z!fn}EeRigIIVv8Qhk1tTv{zn{jcu3>M+d&dJ#%BRu7em`zucKdmd=H4wsP#=m=x@A zYbL0kIfxEDFTl9fV^KO&oW(U}z_7|Id{}P_P8cT5UO^g!9W|qa%{FYT#YbRSPvPb< zJ(fB0F`jy32C6p$Ca8NQ8?l;zIDXmSlb~$76k5ty zMM}g&w}Hp)X?yT1w<$JebavD}DDC3Gnonz!QECII?l6O8@(WOW?qj!=J-bliqf8iC zF$en2zso0GG$y^#b5OP^2i0tsV8)lrP?~Lx{(@@0%Up9dMaO`5m?r^}aT{^y%ScG~ z9*5cE8BJg6$m&&@10KJApJYlX&qd)A&5R+$I%K&pWYvCO@ zeBO|n3{;ung0sARm(EE2KjZoAN~j-g2d2GMsb|6^7(7{YZ?OEx<;285blouOXjQ|x zzgJ~G6wR62T7vs~F}QuqoQ-$u%j+fRQs535*z#r|jgj3A$t9B1WzBow0}@2Dc^9TK z&mAOAEP@Mna`~CnuFUOQ7xFMV364)+forrQmV7?W#m(7-%+rFZx}3z#DjaH0HYeSr zbSRuN7~5}o#Z`4$17VlM*p^TuHg$^(eNjw;nTkfRetAB+e?E_=iXL$rHB^|qs~QbT zT#6=ZRoRB4-(aX=J%47=Q~1`TMYjW`sAi&Bh224Q+Mi;_D)#n9ZBIv(sZ4-;-iB7_ zOE877lU$0fK8x|wWlK%2fm5~-Egi4M3|5Y>IGUA>mU|E2yPbA4zd#x{npA-KGFd#) zwJVuaY{ZkG1Bav%p!9|ZHt<#4x>RNQV(ks3h9}_S21lBAuN^y(a0GjK*|U(M8koFw zJ*<49Oc`U>!14YXFky`?4Qr>*CyZ2rB71#WTxbhMF1BX%da zs3G6*WmLM-q(H;vWN40(Dm+fj=9VtB zgm}HJDF10a9Qjbm-&B6b8Evh_TiS}4y0tS4dD*?9YP~Yma$c}$-!iQ3w;G$euZ24u zyVLa0@owoNf7Z)1f?disRDBWy)(NBGqwh30JTwinr!D66K6Qg_MIUgubtCRRn*>VB z$AN#IJ=^%{3)lD77m(H(3X_Ms!i6jE(cW(_+SgaZ&c;I+Sg(uzL#}}ob4T%jFnH6$ z2A{}phJ9OOxyAQX@xInhtT?N|Q_Frh|5l6j*4Jc`UV5xAT@{!Ah2`HIijZP~19Mk`B56dCix(Rr?>f zzFLt6O7UBG<78bfNYrDms>Okq-DEhTxdm!gU&gNs7tpSgQow7<@WbbHBbodm=w_|Y zY>(u^hKr*y+e(Ms_n!c|^F}k*nS=R{r@OF@UL*N<)z4U)^qSN7=*vD$vO)i&lhI(c z4Vn19$Ju#F7+a}B4l+|v;o(%?XiQHgnK_DgySou59jk(}S6KcfZXn98x5p8yUUGfv z#CRix0r2eA3P_5W#NBrDfT{Jbph+T!pS83z^FHtnqPiZ%($N~YYH}P7-PZ(HB2OSx z`>~drDv+M?0Ak!%prlhGBm@s;Pv!D(?FwV&6Rg1WmmY(W;V1Fw%K13{xEL7r>A@YH zo53|+^IU2n44G+tY+dDI5S-n9{1S}>Yf8Q zyLMWvmtPAc59vk{&VzB3ehMTHSPXTawsLP)+^djM6rk5RR-H+3gU_YGsme6{hgu@UL_x(wpl33RjDBRHq| z4Q4)Qg4wep@JrJU(0|wi*FrqXVKv;WHl!lW1d}SH$M{X$-2&;p6){)Pfl~@5({9- z>3tY!(~H|-at1xxtz@eed~npU#c-?TDIB;Y%O=dqh1VJBIPu5@Se7=1A7rA<)NYy5 z^RAsp{Od&8)+K{xCq;AW1(!i5{{XIBy@@N}rr@pFTTngDSai?aj#k$~>2yCmmON64 z&Ru?q!(ST{7bVFggWvEOyBFbfPj_ZEcQK55ZA0l@G|^SDJ8f>3W)DN>VB}NLJ@LXg zZe~gAER=*n_I=>>=h>iBY(YJ`*Fi2#V+or>Dkf#xvw?$Jc(0V6RO@HXrp|4ECYK@n z)&6_1yu}K|R;#l;7Xu(pwG*o!<^pMJzk!|LC39%8WW~mXOnlD)^c`-@Y~SVcoiyH{ zh3-!J?2?S^#zwYj<6I`WM}m|KblJp?vNXhQ0Sz%##k1~RY2M>zoO!e(d;LBDHL4Wp zOZ8q+?e4*97xV)Z>q3DO75Rv?e9ZMe42RP1aYs|WVc-}u8t0};Ib{N{&fkQd8QQdX zV-RM^d-7k+8(`&*?clX-tXsnMtkjTWTbhii&h-ZW z^pOS}3f81U6)Snm{*IKqLW!gUkMgI~WJudP2nHFhh6kTJu=>vCOkGEkf8wUdUaeVF z{$koU-r~ar-dLd?0)`(fzj?|JP7M@i)&0b{qed%mVaDK!ndYIe{nlDETZQ<3xH)y4 z;zXSv8B)b1ZI)2F5Kesbb(5kkaIDgZP0!I`Lo44yYG)ZXC+IbfIHf=hpA1<^)e{uI ztjvx(WO1jwc5pi%oq(QIr4Vp&I-YBEhp+*u+@8Bq{Ny24B-|B`Q{LajLjhu>6xV>u zOQyr#1@cV()(B#OTfkS*fZU^$S?@lZLFh6QUcC^nSQ6NYnh!Q{hXZ|Jq^=T68`_O} z1EP|e|b?A205;#5CmUM=Wf~IoH!Nyc6SJ{`%e0J(8^r)SPccRV7H=(Q|YN!ou zx?#o2uH}Kl&guLCmnSgv$|ugbxdSawmmL4g=2(~oV=Irt^aCC+ zak4I3?==VftX_iEB6XU6x(OCNwxyP@H_%f4752J%95tR^0h5JUVDMTTT26IkPH*bD z$5$8Ot!N|Byk^GGR+83st8$B(dKDdSFNdawZ&7it1C1!_&NS~ff|%EK7?-HREP}7_ zlKrgdO{yzN>OKNBFJrf|rVi{}O(#~-QHBe+Z;Ll>rs7fNfJdSaV$p=oq~Pzuo9v&ykg>8F$Buf0 z_2QjauG?Om8qn zP__g{@SE`~{}l%+IJ1IhSFqnXbvA$5Xq=~6uVXlIUOlFDHDR-l*|3kr7hwKY9t*vvgTh65 z=3=PNf|?8AVZ&LJ9&`sno+V*oq&2&^Nr!ctb{6M|bS~~WhvR}5W73PFa>1yM+?6Zs zAXN1>3P%s7i_!LU+DDlNHgCeTOY`}>W11}Eo-th6YfJ?R%PTVGO3}?vw{hAqNo;N$ z#TA%av#68n@KB};%j;7O?km%|6OWv^O_m1ib^SyBMhimTm1>;b*_8G`aI$RVWL@{-k$azUh*=u+hcl3yW5ZP1DX>9rG@0Tn9X4xQDWnuR zuU*_;LFy26qcITq=p&!;}j z;`b()vyn5bX=G(1c)GRlE@00#@H%u?(wH{YbC5*`(B|EFUfHMYXEAJi*paf|&cidwC1`Y3 z2_o7nuqbC$obttnT)P-?*Tz^-`GW;q+>nF3xBf0nGIs3lyOv^|-Z+`{}$ z`s|vu4Bay^=jRM|WBMUeF@MT_xZY<7Tx|+)>ojg5%>IxCQ~J#TUzbLBcfyw53%|m~ zp$63B<~zs?Z$R_ii=jC2J(zFYj+PyFf_(OV%wBkt@6l2Yr9-5-v?hPasJwt;*vqCa3Z4$7JZ+9Ui92n6fcZH}S>_#+siE;w5skxJeaK zG-Gpn`k=NNZRhT*u-`ZY$}Ow-;VM;_{neV13wGiCcFOFurXG7_EV}okX;XM-W45}Z zDQ>^##$sbWW9t4VxO$-}^XX?!I>j0!P%~xgsvFSmygZ9heP6-H8PjVmMfP#K7JP7c zifLz5Xx)HK(7QGXY~dUiz+LB?N+qddUL!2D`iR!YUHPr|O(|zXUilJjRqpm|D<&QJ z8gvJDqsxn)ard2-SwT|?Xs+)@8RZR-XkdWrwQY&-vJ!WXP^r{CU+8w@R0U?4U#FL2 zJ2U4837qeWMC|+4nq7F-u2SuOCsIkR#-`vA7$bcR7m4*@Q9GTObbbu)D7T%9KllyJ zS|+eL#lu)*>|1VCs3y6EX<@=QM>?X>lY*9ihMM+rlpcE;hiT=av`~vzN^J)w0|b10 z{z%SHZ7*B*)D9P9GED4X1M|Iwm_Jp9`K^h<`+Is~awj8tIB6ctO)vNe5d>~HS z6^iE$CBV5CnS8x+3w@Ft25(jj;AH~$;o7vV^k~^7Oq^K{`B(OEmd8&q?dO>w@LkF> zyq~bOeP5%xY!My&Sb<5L2_2tVfPUAcn9a1uWv+XdL;H;zVb6#e*iF4No78fOl;zxM z*o2{|;(3MFyk<&WyQkupY1UM{umW5q%<)XU3DdfDgFm#yfxe#9tP~9QC9e-o*iT@` zob;?nvG)b8_KP(0n(fZwss@8hQ(rpL>mGg@QwD`{@3`~l&f(?(hj5?MM|fe^A4)$= zpqa;1SY?F_dy}~ax{s8_;+?(O-NPm{Chi4%GMvjY-6o*dsB8SD*SesTJR2lEjNrA> zU0k>OAs%~J!Ogy|N|_tW@p;x7dYvc7RIkagIZ9g0YI7~t_1g^#ZZCzTXC5?m!WUGv zuEK;PZ{UmFG~6_EJJj(DV2##8RJUKw2l;Q}HJ&}fv#vfE8#^Aqq&~w7Mo)Ro$aWao zVH*Z+9LGtxe1+9lGhu>{1RFWlfo4rLp;3FH*a5-h3SJmO*>>g7dulF7c|L-Au`w94 zqmY@!SF#sVL)qmaUFg7F4Lm()3rrhoM5e1cx>c@FVEc<*X=afg@@ZnRv@jo-JElUryFv#0w(bCv=ePE=<>3)NXqB?V?t z<3Tp(RH#zZ4G+E4W&@jd!S2K-u*%q#yI6M+ZPZoRSyLM{Tl|=XtlwVg+P4q6WDV>Y zr0?kV!l8nVJN1=~8+#0zM~m*b&KAtb`4RVa+5l#^$ArmVvo9MX->&CQ>0)p>sX^B| zmh*{5+SG9D2BvgzrS95K@WF?9sFj$3<7%R@_M0%(%{CDHsg8guQGsr+gt^BA=cu2*W4&()MhOEh;@$eGZ`X$o- zu2twBwE~Bh*TdejM6l@N2Gcu?<5C_KfV`9un@9$1oPr#A)#;Ljeh^%1cN?TS&SBe< zJlUo^V>ZxlLuF3xVGOraL)|{UY^~{37PPH^ov?Vo4f<3BNABn`*TCMeb=+WD{=NkJ zF3>yQfFN~UAZo!hSFn4OHvWY!~4&SVxG??k1hK_Me7C*pOD3ht54y8 zmM>7?6@=$5Uxt=@t5CHxg^r9{jIZBSf`-FjnAUi}t>xwdklw7$X%+u}!Yn zRkH!r``WM>#%J*B$q_JNbbHFTm1LzNo3%I$?+g{ z`YLX{F@|aFv8D4lN8rXxb@BvZ0 zdU@&;>;hZ0gSi^FWv+#F7hsqBLG&>Ufw(*aQ0#07&ud~>?Y9PcQreS!*{Q(H)#RDj z%ib>SW*M@+7cRpJ?OjmgtqU4xL)A^0$TvAsR{na_iyz3MC-i2fGd+pbIAQt?OFXj6 z9Zb`w;J_ivK>por{J2qxDNm0=;i1L9o!B=0 zPk4H`CYfx}p?$Bnq4N6cklD133tM1KuPm)0^Qb;MH)|i{S#*WU(!s>aQ+T0w6?DF+ zN0;{0qrbui9YhdawmXu$y8_LH> zk3$|!cEj2*2uN-@NArmi|p67O(W#iX-yMTYJ$sR^Lh5IY5;j!#-3{5p+ zuhdJhIs7WOd)a#Emar2)os^&j#Tss}1|7v1Wl2_RDV%whfaM7!l8@Ne_^6tyCYY8tw_4IUja%3*QVCDxL8Kw#Qg6d#=x*oG| z(j}I}Li9_9NrBtlArGBZ@GhWE2hyRFVx z3|prtkmScTaMRD5@4@$>@Tv}M@ac~@u-z&&%iG{;FFO_d;!;?-`!M`i+?{O;mnHji z$3c34E|WN-PJ`M#!)OK3Ok||b(z`E3=_wi<9lr!6QtzPmSSPBvr_Tp`y#n^lR)|+G zqIqKx1oB(C)kkVTnwQ|eHprtx$4cbR$kW{-b$Tsbj_!lExeE8o(`Nf}RFNIc$zEK` zi(mVMR$DKkbq^C-%S>s=mL2$R>N8l?V=cVXE8*WM<-xK(@3}K8-@~=OeNaBckjb=D zV(+c@quBbsJ#>x~;*4%N+@{FK5c7E+zw(75e?X@YHb#{}Rqbt7F-w|OpO`_DmRqq4 z_j=&$iJrLhl?0Uz>B{O77UC4KJ8pjI(g4XJSiO4#sHcvC=xPoxUaA4}=#ui?0}HYD z)ZREjWixK~X~8Y4SAyePUp}?87(Dme!kNim;b3Y%wn;^tW$#yED{oH#)6s;|nj2u- zfrV}=x?^Bn$X$54$%S>hb_E`aC8PW{OEyGW1E21A%->3H;Ac)K=BmnvqNR)lvpxR+ zqPzys)QSoW)98SFk*Hn`8A9uFa#5gl2RmLngi_HvKy61IKHszhEm!NZLGMgK+QEzh zqu;>fU~3v-b{r%PJ=i{z3bb6&1S>u#z>=QQY{fWNx1Gx_z(NHzf=lVBx8M=hbbO6^ z;@(*P{CZI)L5E z-iXsDC)2TRo1nDLn>{$y1s}(pfIXuRaO#phKz99U=$|IQ-eHHpS^E;0mdrz|`-bd- zlq{Pvpf47cmUEHLFVL}wv0b{Cz#@WUwR;|;+M;pbuC#@#>vtZ+6BH;R?g6^p*vW5Q zc>+c#X5(-{ch)&ykt7s$fQo8o8e@8ryIMjs16_X7_=^Z(%KJp6imqyOI? zv`e)2P}0_T-{+KsvPbsH$_m+=q$HYzBFajLhEnQ2CnA+qWE6!|gb1IE%%9)m_xpbT zf%`t!eVy|<&)4-h_v2dCLP2YcaV@W<069R$zfYZ)AxS=1(x#%xli##~IB5&WoiXB! z=-EQ)7kBZ{^V8yiABSnugl5UhnR?u&p$2LXH-pB423V%I8Q!`pa$|WOSY&y#@jPu_ z^;VM>s6C|5=Vnupdn}#Oe*w5;J?}OePcv0bFnn4jgtqE%MCd+T9#sRuV^@nxeR501 zZwYXB%@NY}ehF$i8)(VItstKNB))p`0BVO-koCv}XlPt2T>iETs+Y}2>mlnY^Q@H+ zY}%7GTC8B~w;`a~>lR3fQ8XaF9$uW8g2QIYv1*?)*tXo^VdI=N)EMv={j z9inCRKgda54LAPS>0N8k8}%eg=yAOd{?gtHN{4(n^Urk}JyD*5CM<$ssatWK(*PLL zvXK)mmZ8Z2PmUZr5@*HR;}5GfEH{51Ozl0G7iUW2x#hCBZ0dLM=Hhe+bJ~y7*PWDX zFh~@?ojQpQ+NM}J+?-OE%;E8hF=Se1O$E{_I3{qi@Nk(T%%8D>Z_ltsiFOki?^utc zuPK#D1s;W`wup_rs=3lH4qLTC=%<$kUkjSSOIt?b_EBp9W1hq6R4Z=%<;*HZTKG`1 z33u+F&+8K0(BJI^_3eL;+^xL-)m#QtPQJu3Svj2g_aja{n+W^YRMPGAEo3rRx$JWI zIKDc;2V%ZumnMwd3z!xKpAQVCnm^v~a8EJmzI9?#@1N9Y`-=XwOR@H^X!>-(0?#%! z((o~LFrldef|s_6C(MiK(pzs-Kl4%YD}4p)WR7NYRXyCVe8P1;8dtm;{Ab2os zGq;BJ#yPXx==iKDSU=`8jokiNoFF+vM^vLJe)>e-tPlp%oy+KG;S*|nJDdki+zC=g z01ZAV2wr|WVN8At+jO}=<|IcU@AWt6$m>aO7OB$q{;#0l>Gy*Aib>w<7nGJRE>L5O zel^0+UhiR?vpiQ=CU99pefKVRA-hW475=-%E?1BWtY4ka&7rS(uaOjUj zlG?flJVwWn^thv9yjE|vm)Z^IvPbdGa#J=s@dzrSnqZLz@Y`o^=;YoQx+&dI8mwDJ zYMyC=mgQ;ilHV$>{W$ou^xJW#}gCBmgkwO}+|2A}=3;biMd`m3Zz zHh!8cxB3yxKkgzrNcTjuFkL*eH4O|W^uY0h&9Ua{Q>?JrUS@K?4k7wq*}#jon3) zH@j1#Nq<34k+>mW!VdBgcyruh5SCu0k!xkKV`w$Eoi)NYrvfl%lNTlhJL1cedDJ~- z0%mS9#?8aFgIt3Yh*m$u?Xz#dpXLBQ)MbG&+v3Rj=OHRK)F;zEZg_6RDT<$K#trff zbjRccj7&KtD!$t$_#e?F?3)5ALs7WDLy0dIrqgV1AG$1~F1#w(CtkmQ3>TJE!sc_@ z{Hr_3=iCJo&TdfjG3zw<`5>cKcDzuJyDTq3QV(OXK)GG)n%W{(EKU$@k10`@+eK^~ zG=bbNn=;+Vqaleupz(n|t{l;uZHyCXsS9!B{zK^KRtQsPEC3BtC1JSnTcKIaif47d zf!IMkdDiqSaLKj7IukkUb>*md>xUYjdtVK0GY#zTCPp{Z1V|2a`%tSLJ88cMZ? zJ%sb4wMpiOe`)Zk8h}Rypga0Ee33MhUiNmOqf-|{)EmLR+!M!1xp0Sn6}-tS6;G9X z6PjjQv$vnG7;IJvtABJtSd0rtnwVq2j4NVqFldIu@fRQ?=$75@7xhxQH+g@o%j!MbcWDL;A+`9_ZHQRWIKKNVg! zao+`JY%Q@eMh@pEOaS3Y1%zom625<%0!scLVZhxAO0{*Rc3EfEyi^RsAH1TU7z;Z` z>hSC!bMD^Mht0H&`JCT98Wj9f5;@iqW8;-*_@*KXDmeh_3)T3zVxe$Lx|V(pjD>u? zlT>!p9!I>oNDrcJk^fbB%pYNZ?S4HaUpNu<#*Tx2_Hj3czWP9>UUj;KAzkt*6Pm-|m9BT6! z#+z41ifJDVxL2aNkd+(>`|n4AmR`FkQ(Q;o4ok!_2kw<7yb1?{8OpfsNRUu=dKS${ zUkF>QdcedCZFW}I!l9qk+4`I*_v`2`-E_?#e!Q^5vD0L*JMD*PcK+Xb{(b(SrA;K~ zrGp{u+r7OP8>7AdYoW&rd9iMqmME1JPj0?9>EU;GJk}(KvwOyZ^0-r=6t0VRCus1E z>Tj_0<8&G~Eey1Wrh(OJW#L>x9juC1B*W__6jbHRU14v8cl}gx>yO(sc%(H6L+$Xg zN&%@DN%K<-BSp{^^X62+wH0P;`{F&cz4E|MODgEzAZHlyW(mZ9lfm)7tR>R@e+d;$ zj(nnICA_UFp!ir@_SH#;>O;S2M4&bf_B{;Uqx-SvXA4&T=7nyk&Teh8IIB5L+&?>s zeig}xAIlF)ymYTqb-_+3-Iz;f4hxvuc(7#c1sS~39PTX{s`-yUWT9)73o9C1U6%$qIbn8sEL%1VOZQ%;cY}1%ZwYeIY#VGCB@IiYEHDtp!mtB^kmT*f*)m@Ibo2%Bn2Hil zD{G?6I$JimQ4UyPkE*A>!0$@|pfM;>QXkm|#|JB5dYU?<_+6$Ar;bxe!#miy;2^E{ z>xGZDD3xy9zn}?Ey3TU93f81J0B76hqn+ zu#TFg>cyhyH}vafHr#XCBfOTn2(iPSi^KCZ`Cxh<&KabKn^#D&vBoV*`s+?hj{Tvr z$KR21K^wW;n*&K>`_Nk@8UAwTub8qj56%p!qj4tP(CpX-AHRBH2q!^cXFZ)d*Pj*l zb(i6+ULF@-mBi zaJdDZ_{PBQ zq`PoIO{9?QB=6SSRb=*J2ql=l6<2w^qutxJXz$=AA#tJx+QX~7&?*%U(K zDsPjkatqb@E~Z}7Eb-w;3+~!-K$MUCM4Afb=#bnh{8lxVEXiL7D@9wrFjbCC-pWYU zT1UW_d`;Rg$A_<3R@0@So5k*`44O0D5B-lgao~q)2!E=?f%RrMSxpIz&Yyvk=8^Dz z&c%{EFklT`#9&nid^}bQubp(_`;+DH&eIkl?z$`{_8&k7AM7wL)rJiNieZqW1{6o7 zK;rgc6j1V%HWZhF;B`SbsQQ^yH#t+DQYE~0EuvAj12D4UFla}=1LMlGuyUI{ekywh z$%#F{eX}g4Qa@C({t8{E^tsVS5z39tuoAZjUEhm@*S-7Ndztn$$A*d%Dlw3<4 z3oc_kdE1~m7&4(5f~U*!`umBb>ZitiEbTEm)q@6wspBkV1N`-1BRnrOh7}L*L;04Y zBriV!%084pEqnp+NQTcjD*=njX>76!_GnYaM5;Iv zdMG5f{H3+BHr_$c{)oT6#*1fMR|s8VF#XT}jz^zp^Sw=P_{`@`!oXoG zPJ+C$22{EvW4XUd$*?<${9p23*1y#sOa@1A{~uTJ%f;Dze()fkl~~N)HA?InmT=G$qvTg4#VH>n`B+6C@r0@@zrI=mJ^p{32ebM>iEkObnlYSmEJuQ))&&02SEZ~Y z&PaHWs?O_cbtO--z4(9D^PZwgRFQ)|a zVBzkzByzQnrSCc6I3Q>SHpv;Vd~6zDGS2c=czzEW!gupv$MuMI_Tfa{!R^FYrcvVF7< zdS7;jut8CrEBF866eI6Y?Q(q{Djx{ZFKa&izop^%z46>KX*{*rfO}snCe2+Y;A5NwYC64nfXqmC*&a{j zKXmz|@?LRQw+!9?@k#IqI6(h-W{8PzH1SmRMUF?T%{@f*K>+v76qGH!RKx%bn3rFoD{kNL$tQw(vJP2*N<4xnB~ir zrZ-^j@e}YOq=I5fG`Ml0KTY0rO4Ly5%Z{I3(eOoa@Zv|Bu%PHEg~TJKba|nv#(GSm zG=K_yj0xO><5!h%pRD0vdESgx23FAm_b=Z0au3Ch-*wQr>$Et2wGMZ!Riir=@2EL2 z6odtGG-BdCs_ObmkIa7wP43y$c5f_*qX*N%i>9b*YR)H;P4Ue;FS3$Vg_Lz4$m7dt zFw9Y)z;B@-V;E1nPV|H2(nq0Rr9aGFp$T!9WMR_7HhA~Z6+;q@@$8ayqTE3}PWvv) z_Pe~fz-b4m(=ghY;|pD*tKez4GfoW*FDW9s3pTrNrwvCg(%~d6el$W9dY-sOj}E3& z-o`mp>1Ra=9ol^3fDZkbHi6zR)q*$UU(loGVQgn0&5MgJiPh;7DI_hB&W+Q-cS(b> zqo51Ms5^tB)Df6J@C;e)*&s&sJ_V|I3V68RUI-qzhng2(5rT80VQO3re08zpBiftc z*^iZ^=J%iA{%_wF_1_6L7Ki~yON8LjiX8Vy3H6_^q13N>xZ|@TjT`tH;?G!=UfLc3 zW4Fk2f5X2e4TwA&7EU;AcKR`v*3NN^0B^(dp*-<4O&AbDH@&s^ zh^GjW2uC=%&<$I+T!9)NOTKxxlA3O7ao;=YbhqUhJMIIg-OhY*!T?NOc}E!P zSq%?YTHxrrFJSieG-~EXuL`uK9ySTcV6Tky)!XVZdA5p0ab=a;v`N*fP0`4WKjCNs!W^9C&TT_^-& zFjYDD2nM}>z_nXy=sM#hs8#l7J>P3^{(Za99HBuy8|J|F)t9K?qlXw>D~o^ESo2Af zhtTxx0(dA2cy{f3QPKK140>Bl7v|rBL1(`~#iJH_y?88*oWF{0YnWnYq%&rH)8=2B zZort;gT-}krtrpnc64fXI@PW@D@+;HSFAnwl}^}BrU&_Ud_LhUl;Uk@FOw(iaFGfG!j2N^%54C6q06T zGKx_hrP|d#qGh|0*uyVHi0$i)7xN0~o&I=KNa+-YwF1OjN5SF>D-1fGTI%rYA*2kF zWBES=hFgyW}*RB+o8tyLe~33D3@2ApHEVN;3RlhHxQC2bHJWK>hZ5T4`y{-$(0m;=QX7 zI?I4x4^5{Y-yTt}bWc=&6eFB?695l}*|F>dZGJw`iKhQJLtWF(QtI-2Fxhm2nk2sP zadI!*)}ATShfMLEbP%<=OOwvgC*rgHU7&XQpWjoq#k31{n9*cPE*EuRpt&^~8V+R1 zlzx(BkIZrX*L~u5_1Ca3zZ~AkE)ZLuo3l>QD7w0|no6HP5Z|W0BiZ4mxJ_~yydT`4 z3rhx5WY!mGP3U|1@bCR#b1~raT_p&pV_XvXpkFV0&Dmn1Jrh>El^=WYnL(qy!x|6Gb4c#hKSei_c z57xtp2MKiLg)t6!e@2|J6#aPs7(d zeORLVK^V6AGc5EnfxdT?aKMb$WN@HAJ6QDN7w254DQXObIqTx#^h|MD;9+69eKx7& zbka-n2w`8GJx1MsP0#*n@u4BoJoCOcRQ2d4+spmfaO+K)m#qM|=b56#VOidjI+zYT zi=YweMznJHL5g1f8(OD+Bhzp{@kYoqu|Hj;;x&gUWMU14u5By*eDRQQVN7qncT|~e zpT41qS7&3Ts(Wf9%@!w^Q0bffbTb9w(14g5iv}d^1KSfC^_-Y z|2kPp#1u-pzW5=NpT`ATw$Q(XOrwHC{ z{|UM6J0P&cffnsI!t77eFvkCF$%yhP)HGF(13sDap#28uQP-cI52&Dq1L^dA^AAbH zX%l`qCW?YDZNsWh7lfG~c94?aP3jSFOwxPiKH8h;3j5zTk=@ZAcxPxgRi7#lbL?gc z&(H<+?#A+iGIOxVw!*%D+A+{WHNYc0OC|J)Gdg1Z9_PGBM$HUd#?WgMVc4?PWa%spVrp2&_-zo_8f8* z%Fe3^({Du4>4`=(^~P+Js+z(5{&|~{fzvSM-T<6+X%gj5AI>o~H-uYLuhFW8i=Y+~ zCnRj{jhkXEu<*uqUXfQs`yMLunenAUif1_7b@?ih^|1p!R6^y?tEujY6k5cL#u=~l zX=1Y>N1W)(vyAt{8l_G;Bj-VjR^$pBPiz-Dcb13>kq%USL<%BXEIIPS2beNuBkY$~ z#ZS)V^t95NhnM^Xt5qAkr+<$Z1HWoefc_RyA~B`-Rtvf~@eW;@QAI^J4iRlGrbGQJ z;Co6u4K*}l>jR2xlRE%)wG*kd+mN3f%!XRajLRtH2QOj_VOe$@g!p=M+FV_n+$e+T z|Eco_7ZqGrtPDrmr$V}V6d0<#7S#(oLEG~PRcQSd_d2AK#yA5UeL_j>?|K_9zS=G9 z-m8I0F^zOQ_$rj#83r-(y=hcQ9DSUkL|-HJ(%R-pLSTIp6e;Hmes1Y>qv9boXH(2y6|`@%CSU7# z(d_4Rn$YD%d-WGn`Kl<2_EG0QMn$~NAXQvglLUqnN0amMWgMDi$xHit;^Qkd;@!ez zU?*)(CBW&`N8yx{H{A;**kGza?)i(weOHKw5|ir=;eZIX*Zm9J0-Z|BU(0fHih3e;{{tSDN}qe z$_H%&ub3_H8~V_o&{^zbF%liMVz4Paj>Z)p63a7naZQ6c&RnQ3>Yt1g^Z%0-r+OFA z+{-6vyNNp*of|I7e2_zp{rhOyCRv;*EyBkPA2&Gjj#Sb6O`HSP zo>1WrWxZHC*8}HV(c^8;9*QoC{e{)K8a#TE4lH}>gyEUH;D%fYoSgKKR9-uD;n z_y?FIKbZpBKZ*GkCm_e`r?|fD08Lo_1$ygzfD3UmX?-)1#OgU@_^%_sernkNQ#x(C z^`7>OpC*p`cuIVD?||rhbqC0-_r{-zSLped*N|DL0h5#dfYG1~;Yx}&PX6&jh@8;^ zubm!K<5F4d|MUtyve*xHr}gmu(Q7p6P%EXpY^I9FQIyj)5+;?I;R)|+aDDtsalV%_ zCMK_=o&R$D;O7KT)v1L(9``8fSRJgXZkOzRw1<8ttP|FJwnhKnQrJ^dfiHKR5>gL) z=k zcixFR4&R|xDNpPXD#Z!v^C51SE)3J!##X-RthKtD%#OTIco(m0Z*pFpb67jf08lH`al$=<;kmRg#NxNbJhM6D0q)z33eO8aN zYp$ZW`x~9eJBEqr0yoA*@vgdBF54c-r!NoX6Dix+XVybnCp8X(Cs_0S@V;gK@#a1; zCslnW9*D!wHLtQAKF6@BFb5kg=9XQ~IVAj$g!_0nALpELM{)V^$zC01k5TSp1k988D)7FBe^Ih35oO8MwYm!V$T6TJ10p?6Bl z;sXywjCp*9;6y0hNr>j#Uk_pRr)uyTcMFRq?I2liX*6%SNgp~jQD%%OUDO?gPy6N3 zyNe64*Ur6=Tc67}tPZf#ab=%Rv(up0nq2Uko`@N~dVE&-Jbu<1ij8Lr=xxhI=9VOU z{`>}>zEcS^vgOKL{ubbhVQ-=B@+VK4(O{M%I$l#o-_a4+ZN8jaJ-Tu3SWnj7UqT1O5H?)y#djTd zLc2^c=Qg_G1Fv)5akXjZ^wd5+{XS1b zJ+Jw^ZG(K-$anGZ)iehD)nf7R?RQWhZ3U`tGf;V*AC6r88YR*j=;rGESO+s%lF$#Y z1k{ni>L*wk{}2Pp=ixBtBFY_g0wz2x=6+Il`P!?+cq@iP+2>1GYSwu^EC2uMWY3U( zJbBna{tz^PuRs1Q)Ni~l)}NFnjSzbw_~=K;FSCoFH1SI5$=xPA`rRA)C{qPN^bwp! zJO;ZJZme?evG?Tc4lp!5MGMCgti>9L*(^eJ)g3__&ePVJ9W>u9m3s3g@K*3+NiP$+ zyZ9IAA2q=*v3F_2u|p94_&T&^R>GYqKQ0=bP1+$JN%2h^C^ab}zI#TOr0qE~R1Rt# zPYH(8lEm*5cZd`67SLW3Pd;{Af-Ti9XjvZ#t_x;S%%3|{AUqL@bK^l*{V5#_y$LoE z4^WmdQ=Oc6r{`Oy{QCD8wSHl#eW$JHfEX>yi@ z&~EakbPDdH70-@Q_Mu*!&|O817f%Y8cQw=PXIF)SC{yaN$)e=7dRPc=j`zR;uD}lzV_;8HUZ$y@TV0S;;KLsWazNVKW@#lR>w=9HMq#kgiWoM((vWyg_vE+ zq&lfT7yno+k>n19*40BLYclxPQJEC+<-c zYD*7Np=Mw3{Bud1UZIKex}@00I;nJEjVqGFUAXe!PLO)K3Fh=DfzducsrlU_iaC%$ zXA2&dZV1{93J0>`PuXG8e(BBhY$#WZ?t!Kkqa>!^#(~1Q?UeiLGY$J<&aFy?G}k+n zc1p(3#yO8^p^LFt{v=v>VHFF$oy8(2>Y~)jNRo8z6pp0!#8jatUs>n}{U*PlmsO9T z%go37_SRewC0EXxxw^-_rW6ClntfhL2kPf_KW(L;~Sj# z@S#z#IyemK=bsVHEv0#MWeSWbI0MDk63JFO70i9?Id{f*us*8Ir_R5G+Eud5Z5lY} ziz@5bm5G{7dtheYA^3c~HGh{n1;Zv;a>JO=SpP?-+ousXQ6lzA)i+u;1g9mw_a>Wsv{Nn;V-V#npXGO3X z+@DSTisumy2T}EDU@5sF|iQJ`hSU1#$!c7O@SJQjK zNO>PLbtx73s(@o{M$^!qJu$p)G;Fzu{Ksq`#YVN29@8#>U#l1%%J(IuwWnd`-J=kE z*$x*N9;3(^k0G|NFJ{YVa*)dkx;Ic4R2ENyKbyR9-y3N@@Nk=O>YzU#(8-0y%z350 z2W_yh{2VBWFCq4@2U*rz!;G(%JTv28a!;wE)Wbbkd9WR?_fUf5vF|`f(@6MhCyRNS zKD=x3N?2Vzlg7Sv_Li=h{iw<(@bseO()PZCF zyTYP1hO7~#h}ngeV(kVgo;Xs4J-hwTQST6aKB0)qv4Ya~wup!C^x?2=|H1YNe*8W6 zJ!Ck1hKL0o{CdP$l94sRi%y-i^{p-TzuN+qmqtU@aXBh#u)-4?7n0ZMJ&^BllIB|7 zAWu(E?v?&c95w72v}ASDs}=p2lkU<*Q+e7t{XZyPV~ts>ufW75?mTIUDb$Qr;kKc5 zf}um3uyf}FN(l^vj;5FJAl9C}#vP#%8X9yi+L2#8n@#1a}8(J31V4uJl zLTrW#Iz9i#oio+Yf4m}RZTd)gTLJ~uf4cm_?Jo6P?Zp;J3Yg^jOwe046v3bk>OO5F zoz5_rJwk@JjcJ2p-ty4C*dD`oyd7km~_@(K~|?lVCv>PHWvOeKjnb7_K# zgzvNr!M+aOA87D!tQ$5K#-4{HTJ^Ek;ma=P%mb?FDN4YshZzU=GQ4W+~R- zj4}z{oaM;Hws+w5w8JE4|5F?>$yWF%Y!$BFcm?@#R-CA*%gJ>Ug-7i>=%n3tD2(^u z+g^5L*;E4CH+u2_rNO8X6Tqfdc8V`fcZgfRAgLRi21h{FGlMRs-h~g(Vky#ZApTC>1~v;VaoW#8)N@m(aA0&ZZGK^co4q^0 z={i%^bw%!@XNJXx+eGWtMx3_V7cTwM!i-`2OH@U0 z-VWF2e}gp%Utr;}Rbaj0Uo1`oIPB?Aa@~I!q#ll2?8Z z8qAs9KWKpbU)c2QJKeT#0GpUIqW^{x8kJKfUR*--{hSgSM}$#e>u)jti8MEVH|3!NfeP>S;Ye{P#T)D`4ajp4vlVvJq4En9Np^gu;DZM;%?(O;dF^OedM=lK4l*FER9$Wxe?e@GdIX01JP_WhaPKb#FzwI@sM`EX=#$(N zf?xaKDP6#ghFbVacel7^+kbR?sv*xd?~j>=*MuvB%voohyySYJJ2seTiR(kJ(Ub2w zZ1wO_X+Pbp(hmdLDDA5}S=st<&A{{I^hk%gtXkpsBx}4r;xdfsUPx|6LxkfOTS#er z9PQntj^EdqVr`=puK#mi628xrs{PU_myNJ~XCWl6Mhb1H0_lHRcie3-F3gUG$s>I! z`fxf0c^&|-CVRSX97|?VWn?z8liVH+z*dhhAXlb}GnN^#*Xk4SPFa!TK3VXP!5-XT zavBb{_JZZYX&S8WkIo$n;K&aLR8~(QjnkLKJ6k>QKY4Wya+cysr*G6R*oQt_SmB7K z<&+)wo{GeoFiWdd(C-{X^@%Dx{X!;uba(>&>}$mQ&ad=wVLD`ospIvzsq|@LAjhiI z!XrZ!#MT2e?wx>He|mAfdIj95lfo;mDv7yzab4B?kx5!Qh9K67>t_y|Ciz7yI+#JU^7V zc1#R;lS$^qn!M+YB7Z1wK%ZVRe7(aBZAWKNX|c z+77)v+Niudma?ANk?j^gv1CIpQg>ElyIy^9td9*X5+0T`SM)%op3k8< zGoHRQOctLtJ44#qAt>`%89pVJk#Evz$gS|;T}ymWZK;*$f5!(+UR{8VpQ0sQTjWVd zxeBGtIvjG_oP$h!@#fFT3=cN>(AWJK6d+{a@dz@*R zMq#P9Nlwjy<2MiHwLY#mzrCB*WnLlszOCY#GgZQ>dCRHknm-%GuhL-n{N9qs%;QhQZmu^<+1$N6oKGX(xT5E> za#*C)8;j36VW71#D(z9h!zWd7nngL?4K(JT8RMW}QXb9LQQ~dqjBu*;e=wuR0Wry8 zJ$xG?!AR>?h+ktx@4R||uTd^2j?v^HU0VF0&u7>-rxzOitF4dfCbaNeBW$d+#h%wh zvDj15zLr5-0mDq&S^KaOsUq>S!Xp{;H=o%w3cSLN=J=?;I`V&TAwkC&Efu0}QY9#xCva!c2=Dv*G-*+Y}?^&b5~wgX#VC^z57zriqtedI8{&o8DOb&-;hSc;Lxv z0yQdZhWlZISbSy6yRO@VQ-3L3vv`r0zUOT!_>e39n3YZ`7tDEd4bvGpTU;_JRapFW z2)k821O06;X{ekz9_`X$_Os+~Wj4G|wtx=*^S<*w+jD!J0^5#~h^;Zmy0a{J;xvPQ ztd;;O3T~7idw^b)SzteXIcOK_K|5NFH|rM)C;h`ES%Kbcn-wQ~ykUsf;{KAG;SEW~ zivYxcHrmxNQ<$}Hp-}$dB(<0Kggz<#$i0^Ym%cE@+6s~SNpxAI*^K*q%?8U<6?9WG z!nC0mL|bPWRI#}LnvYY&MIUA8+FuKt?A;T^Ykx`?4S7W!w=Y4EkESq2#tZ!Qiz)UT za;+F4jPS;PJpK%V!i`C@&j9SD(gH($@cCJ8F z@nO;Pb+M>q-vg%axkLU5n&490NHZ5W;(4_ptkyOVhvYQSydlZ(@t78ytbXjIcahWBu@Lm5 zmUeb#Q*2)!9%8J|(@jteey+o+Q~pw#sU>Xe9*8=9U0Gk-7oR73vgNC9LV($ExcmDk z^%*>w0_-f&ZGa(p{yZ%v$`8iYuXiErQo9gx8VU?TAqFxobQWSr&_?Er5|5^@<|Bo zBcVn`d%XSnpWZDf0oPu8yuTN8iTY-`(EII4+R%j2{@WSFQH`Ky^o2IZYhZ$bBM1Je71l-!!H-}A?%s>w z?8YD%C%2g%ZaD;!i5Vc5?2bo+`e0Q~Cp;MDN%bv#_*uFd-agWg-6lKW?UfodnQM&FE<_f1u9$Zq3?%k()V)bz^c2{9oj~NcK5_5%6nknEx=X2 z22dE*hbNcI;q;RQ;CHMyPyP}|-;c=fkgbul}yemw~qcKLDsySeamkO|LojfKgY zn_%2kH~y~k6S^W!KThhl{g~Y5UN9KsIBoZXB~Y-hdeVdV}2Cw%a`YevR6tYW&!@GNaA7r zwm`+#AnZOQPgR}o{_&?Y78pnUZ=8M4YhdX#ZT8pl;+?-r z^jaeL4K@)MolV5PJ$InOu^rfB(tJ+EYDw1Z9@wLMG@W$W%`Z}h;RP25J{D(-BmO(e z8)mxjRDVaFXn%mk$%X$rZ|}>^@LkKAQ|2s2)89o<*Dn#i7OUWs@)>AdV~rDB6=>*f z4}7q64QKtgbN~^yfMaAeQu!QbabLU`j&lreuq|7jk?16y3YdrS|2`cV_{ zZA-mS-aUybOloLsh%U^I*^SDF%=z-+p}2YeLB9BH9?Op?0HIGD49qd%9>DP_TD;rw053N~;muVwSasznKD*<}(_LKA+0*&|wD%p*QC3^q z5Kv^0-ivgkhncs($(%#)#n5{RNq|5|ASCoICG;kN5PA`mDvBVPvx5{xDWV`s7eo|A zks?I||9$qEa4+}Q``!Dm@4IV#>vtBu`RkjzTNC_0&uIV<1g1PCR>|Dv)>9APCv_>)wZ@H~2sPdI=pe;>3se6 zbSPcvSLSgSu{MiRq13peV#W2YTx(W~Up+ndL*EPVEz7 z!KJf)y-BKwx|`-7P<9eO`D;n>6I%?==bg(xk6*zfoRxm(^95|vf>J!VVz7fwX4 z7V8Fi;kj~$*vj3hFm^)+J}*^+q_xeNv#|&4fN`*^b$}K4susNQUMkO5FfEYq<_)oF zYCFFCi{8A)m96Y0XA|Gq1MTczUg6!ojuqEBqE?zV9NH(%yTjLK^ZFMy=*jaO%>gE# z14&P&^8zoV@h?~H5h|3{5~_b!H1Phq+YFoXK`?c{8KUp!ZpsI#>LUrp^|( za#YTI@&4E08|M_`iJLSyJoX*`!ik?lqo@*@l{+tBIr{X11BZ_@&-)Sl>%_&7w?H7U zszEHYJTy(zxjP;THCqCY9uDMVrY(n6Ir0XwO>P9q`S)h*y|!GW70TdmrSyS_%3<(X zuk~VEmGV6EttgRw@m$s`WjR3g-^9p{Jz?VPp?qzvVj}k|b%e#viyg)K@~~4DEQrm; z=WaHIUALK7wtGFl_{Dnu`RKX);*ROi@;I=8xYnQ4sK$`xNMAHSjOXw}QWQ zj`L9;923Ue0vTftci}I8S)OmbRtA<87|v(DxQ?$|6%Jcg6ogzeK|C(77G`8G$?ps= z$jzgx`OE$S;n!;|h9Vcn@j$nQ@MW87yhXvLeDG_tdD(j3us(4!(PliIvGDK*tjEnr zUhLN!Y&hDW>#{Fqohm%^Py6Vnz~0C?{LSk5VAZz`1FN(J;&uTqI?%;|$*`|sHZk|| zJyz(%EXY}>6nr;gDc35P&^cxx=$Cu)ycv_kx_1ge=fz9;#x*bcSFKyeCpgE%<^KCc zf_8xE&kf@9c1`E$8|uK)kna4+%}s%Rg{DLMw+rwpYllF`;<4bA$;0P{H0D)5SjB%I zp9aUGm+*_5GsU8hKM=(R?_>KP%mDW8Q1Nwzwb1yr!FjLMNhqDoJ<#}@HMX+thT=wpPxjeK;QJ%slh_th(+2E5W z;A)N4eEOAvqSox@qSdh6ym;kKJn|ri$hoO}<&t#v3fg-qDJi_`50gdC`%U?v_(HtO zoLNu*|K8``P7qs*F6VX6P7jRjR*An_azC5Z<7?LULTTumx&V5uo-4}7EfCp`M8j9j zX0bf`i}NGCB)%O61@t3}Sh=@{@=_J*@`dGA2YL?b2A_Zb8b7zWq$u)ktcV(o_I>a9 z5Iy0(e_rx*=zX(4kIg?HMrZeO&%0my!z=WFK%Jsct4VF>a{LYp@g>9kY^xxojh8>k zm&!lu^n%#fs}^s#<(!D_%=w2CpNNFS&HhJg#AYX1NPc8# zx_EFqogYarE#7n*irsOIVP(`pcz4D|@zD#r#jQJq;AWG8+;{sY_R6nQ`KN(#Fy{Jd z|M&ru;pp(W;^L7`Fn{fQ2#mfXMoumP?G6oN7ryhtcLSP2(JqCc%dG5t&*9Ec>)xh7 zYWJf2(|oh|fX-w2^1%KJV)#gNKno*2L>Vu*1<9Wc2{K(!& zM;$l21HT)S8y?Ll1K~5UDSuwN)H!n^RME$Ml@jh=Inf5;nIBE<7s?k z;~9K>Kb@DlQ5fn}S?}Ms=L<1vWOx2X`#mfqdaFqLYAtK{RSl?gwU@{_@jh!|ooCBt z&VcBUNOogPbI4hE6hAd42k9OqXHWB=t=;rX!v1tG8kYYT1zyvsXB|idD)H2pwfhn{LouAT)Q&^x{gYO z=gU48!)MOm5BH=%-b+Ja+`ZShk7?}vJhOPUtsP-vpD1|l<5;%tWO=?jBON}q>%z_n z(Yz)!6{Th@*+gz4Sxt9lKC(PhymZd_+cj}7N z7RLgsi}q%F_fCQ86LhwH#6j_(QZG2QdQEt-1`Abm zd+$L{es|MPV)pqsxc1})-n494$XRy~e7rWDH<@%JX;IUlcFh%V{H^QZAJyzHiY2|uk3A^{AzKc! zjeYZq{YOH1rGh=7;j9NDM=n1sdKAMu?>Zgesr&phsBXo85B?uRdcc|Lxp#R_W{-_@zUpc=i36yhOj{Jof%7 z|L*K9`Hrua0_^!s6`!5zw7QU<9CV_h3NaJ_o6>%t!}J=@ZmK9`n}02Jn`|!(<5PP^@Ae+h4TFU zhTB9tr@UyKBR3xqtMl_02J_#)D9;`)$iwe!I3Sv?&Bot56c24G7K6*Ly$A6RbHO)y zNjN+;gS}LIJ(TSFytq{37d9e#Zuqr&bNE=zgyZ3Dq57fg?B1bs{{B-_SVg@$FVdTd zAuAV&F-K=V)py&*72!|DC-bJP1F+wQ!*3<$iMp=FzC67O{O`@@Yd>1X-PSKai3<67 zX#bkfw_8SF_?)Hu+f|bw&&5OH(d8GR^DWcAdFn!bW3mAaEB9o#zb?s|IrVta>PN)7 zh0Xc8Gv}E%B$^lA*Pb`skqT$uG{vTO4IX|Wfjd>6FuP7IXusr)*l}zDxG&6vLmO5@ zqfsT{^w8yeVZ8yo;IE^^n8ynszC%AqTiFgid7$&P?}os{25SPJHY(0Df0@E-E&M{H zoS(qYeOi@o;k8(uq0`~cm)&6N+-3e*a|*Ci?+q4_^G86>mLs7~>sh?OswvRpV3PPS zyf6E3X-oL{ok;$E(eM19bxHOgIMy@Jtn3c)MT2Gh(E2p~OaGUl?S&!m;?72_$Pfl$ zTj%mF-`0Y&%Xf-uBlAOtE=^&0te5YrTu)q&{MFw+iNh-gH;S_r+V}@8oz1Hb90%up zF6Yxn_*?y44ia+DWpgjD5O<4o<2#q05cOL3;14@8{>6>e{O)t>*|7_0f~6-1u5|c- zon`mL*mvUj_|oG7<)6&pyDHS=Inzvj=gWoA^P{`0XudVPMf41I?~@t+`tD(IxRh$dEe(nXU%L_JO zZcRS$m*3rXanY=d{QXzptvvtX49k?Y9-+t&Yw3cgdZ3=}E02^XN=Cw`wL2 zf7DJa+I86<(q}4*+1d*1-(TewcT9riyGruKfx(b)5B#IHRfCA#gP=`LJEOp%`Qo{{ z9zG>%D=XW046pJ2Z82+FVP5Y0)o^d*Ykb!V8C% z@YJ}M;O7#dVtoCRVno+zyz->-aBt~+ey1vf_r*?8=Mb=|&9;iS7rx5fLd}7HIE{~A zv0E(NmCnZfxQ73}eWGYPDg=Cu`f;Otd03jidS5|2v*+e3m@cI z#=q*=k*~Wt13FI_D(e0Ft7hwSh{iuU-#2R zcKu;EY?^wXH{JE2Xt%in8)wgFzS+Y>le3+8sn8hK`L}{Fy39O&=KKu)Zl&+wXo3cf zo6hDj-WXnW@Bn`K+ez^52ic+fui5ywmA8p5^QS}DyxlCQi${H#wKyrTXIg(gJu{!Ezd4DO&GjO$&?trH z8C;BSE?ia=z1Wk7c``Dt!gl{_AD(AN5cyIny#*!w`1=hcn{N*&A%D zGX?sKqL7kj35&|{60hFlo+xy?3LAUchQZmpiW_@V#J+WtS*y-yHyudeU-WM;7Qb_v ztr832`)ZJnUx?|E3w&9h+cioCpK^H^@z{f1?BIw_9#=qp|~4VX5+6AZfWeqg=tT;OGq z24xELM(l}AHkhv$Nay%5ycj^FD`Z^_7vyZ%!F6; z1@KYyOx`YO27LcnJMNr&z;@?L4Xg?D<~y|={_g8r!18J)YcX;*`D<75&{AUq4<8H- zTzC-1!^cfxwp&Yt`#S~7@VO%5+*1BhRAH78+K5*={F}J@Y6aeOUpjAo|4mUbzBfNJ zYc%W?EBMMD)7axF--f?a8^YmW1ouoV#@lp!o>?0r;MU_wqFN~sZ-3i`*ZVntMz(J0 z&^c@hEZbk27hVv`t6k~`U)AryNA_&SH>UREZBFKgNo5-c-o7@Sk6p8h`!-!-PjY9l z3j@>mfom6p`RfK2etQmof9)jh?{U$e_s(#U_huEo_i!)x^owcG9Hgzx^T0V|(vB|DPiL;v{Oi`mp60db^zJ@}zON$69% ztN;3)z2fE4MWB3y1t;1s;cp))#Ro>`;S+vHhooVz@@!v6LD*YM!5bFOyH}hFk3P5{ zmW)V&1}ls6`!i?rIB(fNmExWGzJX==Xg-2}Ats8w)poJ5JXLh+RgqmQJdq!rk_Mx% zU-FMVF%#-X#zEA;L+pn${rH;$j`_z8d6{Px>dL>nG@4Cm=7X5e()rD(9pb_C;$rOY zi$(u-llff;uu|pki;D+SdBW9lygV=GH=8BFN5_Wp6&=IH$x%nbX7sEa82S59*t2Aw zD0M3vZ{D;c{FI)}AG+)!+xzBhvAD!C@VrtGUfnZ#IW-GYM?yaXta*qD?15V zRV^NftzVIkc4|Y3iaHM&x)%Dz-wQr`J%(Uc$q75|_E`Kf zumRU^hCumRO~jl}214rV3!vea@7b#ls)?_%-4`#_PZiI5J&<57hL3j?^B+u#5oZeY zfo`jE!bfW>z^>>OJjZ}_e8!f&;)l7Fpl_EV?8IBEp}R9R+`G{U-*|1W2=|tUFHekN zL))eCb(x#`z7bP9ZNOzyEO{wb3=Pdcsd)!l&Csug2X1Y|gi=O}BDT zw(V81r$sJkR{te7dHMN3+2ISB$I}=}ytz{>YkW~W`F&rY|CV-8lvU3haWS1uU(h7e zE%2-O<-iUx;JIj)x%_vb$2!oq>owLhBd`DG<z z*+4w`C50udyr1!9x0|9uk=#&nvjJ~s%n7c>Hs?bj^j9Rc8C=R6fJ|{J4fCpKC6r4e^TUXI8<% zDHSt^gy!P=$J?3xDjtV#M}6vFRB&|Wo~iGFzww;Rx2qO}$Gs}^l~IFW$MtpoIW6ak zE~__Xgzc{aHQ&ny<@R-gqRqNOT`>f{oHCB>TsKh^T|J%c*;S3FzlP`5tqo_!4-u6Y z#XH$DXxyEJ?Jo|?B08{U{wE?jA`qw^mmj{m{UVFq@sa4edI@`@c7B*U zsJVDhKaEul%?I7WmN9)yUihTQFt(4ahn}ydvQqJRp!&M5f%jU^gpk<_1K+;9m_6)Y zCv#GZ{jA)N%`(3%mkmar?kukUD%h;`Q&^#cWuW=AhHxrAFTYtiQzTt36?pFB-2Uyu z-pCj{|0vs)@r{Ul_>(yNc^+XsStQz*|B7Axbh6mj^)$PHcG^p&D`r-GP`YlpRzowx zdu5V>*VuQtpqGPfc>cUXt)_ZEpZou{+M_DMGFyA^Dr z$jL&c#=-Std0@b@ciHaVt3~V=OL_W)cX+mDi^Zbc`^C&V!=OX6&1_@ZHr}*PWnT7B zC_nvk2iSaJ11#8AC3AMlMAjzP0ci5UQFw1~Hg0$AoY~NG5Zu6`%r+nA%=oRrHC8*S zBV0Wf!rxvp9YRN*_S-MrW+%t|#NM3qnaDdjpTA1`AN}vC z1`A5Fy(=Gscw<;&@#<$aph$3^=MOBL*{Af^b-zCU9rVmBm0AC0DSyLV`yuz5uVC5Y zWl(DJcIX`SD~mh4K?K&WfDcaeg}n88!I!z$K%~f-8CmrxjHx{lI&OL!Ms4wAX8o>n zX0A7&&FZ1Bq~=GEZR!fXEv+Cgv+J1n#i_-cJ#s|*!pq^6H($&QEpxKU}w!1@cjGlzy^0W?^OCUZ-2fMw0-n0-(38L zn74KgB&BYI46{OJ!Exo_lY6@Z8MiMBhY$2Gnp%o~e6JbnnHU1mRSR;@ ziLA-G`+PysYk}}#xiU|MtPsb>y(OxPFIaNh6YPGS{xG@na?y8EXBd~x10M!9^U4oh zzWJpTvF`i=I3o7>cfQ=1H&6Nm#(cDwM_>G%6;50aZ8om=zZtq4@@%$w_O&5AcJ>zb z{JiOKyT)PG?eL5K59?Hf`L(|mFIth1zN`#*rUQckd^B{;HI^xAundcR&4sV9tc zsr{S&I-8EcMVtWG_f!sTFGFx=>B|hL$Y2I?ide$J&4)WG4BVON^4>J1y zz&vkH3}5yBU4EqdMt@SDg}mgYF_1Z;AAESSu>WY+X%JJUGkkP2UOcKjjZfH}!cXV! z#fv0=5wP0dV-fW~VkxECurDW;gy$|8UFNb2c4hG zc|?6ajZKG1DYIb5mg?+da}wWkuo}y~wJhsDFvY*-r{Sm%68WJ6k?fU;em=2fcDOia zgIMuuN8V)3Zh!Zt)p_q>ow(=00{+W|qoTyN^Q_!Axp?i=5>Vx;#SZ|47ge`!V{znu}rUp<{)Z11p;e>bKZ|Fq3m{^092yz1xWA$8j#e^|+B zuzlVs5puB)?>A|qSkSmQ53Ny56s>oWO)BSyjJ7ZEqL-TSaxcX4_wqjypKCKAxpQl1 zc4q-EcXDlD(SS94=s7=l2d#qNj2SR<-!H67t|SOQJ{jgr$i*WEOyjpM_5h!M63nhK zJ^4`Dp|9|{buU^MC zE!(`(xlO%#o_h5f)$h=lK--S>+jMB&p(}oAi`$~()ASl%*DHag`7B8t(=?GH7cl{3k|^=>0!p1N`&z zuj{t?kM^I9^8T;$|DV?XZG9+;9iJ@$LRTtvuV%OwQP+_g*yq(XowPb4L$g(qp}Rf> z42`l3+p?t5fP&m>8475oms&DSGzTQGFiRQ~UfUaN$kxfi@S@&#TuMRPLeo?l7B(D0 zXkbX{qESFpb1emQN25WzXoyf>u4B6zHRPggAqRpJvwdCyIt>E{>y<0s zI5r`Z2I%uyE=|y<+cuTOj;JRDCe0Xus}Xg5hAQf6Ru-_SHN%VM4FSh%sub6v_D!En z%jGk%HAex1 z(@SiinQ8#EMrbKIWJ@9+SU&0i?LVDbbi6txIMQ4sV7r!_j^_FtuL8)Dat!z+{y~FN zr{;98<VbZI1HKub2J#Y0A#88HTvzA>LREnl+m~!lf zr8_-tmVW+Ce-qV-V7e2TJK zv1#)qa)okc@R8J9dGotW%z z9ujf|ttkQ}t4y!$(B_$5Rl`N6M*@_z4tZCSX-x$*%T;+UohefuS1N-iO*4bYw!A7+ zqbEvNQz6r~=~$Y?LHN|v=9zdlXlf>%LlfDQjyFnUdO8M4TvK&`tq^CLCXL#{JyPS( za{eqOmXWFomS$(sV7^)bOH<8Sx>u!>Lxl8&6(Yq`2wa2aWNVgMBpWAT zD_|IuLJ~X66GdtVT~ib&G~D3R^^t-xT@?3fU0jqeB*ZMK5QysPi5z7=MXo|9ZV5@H z5kwk2O|NOG?|g!?xjb6?KWRkni!{dQDsk2~cDb^SXvI7F=9Mm0;nype=AM!&b-8#oeHz z;F9omQCZSA?8>37bx}uB=dPj0;1&_=m<6x5-0T~vrvAJYhF(m9t6AV7(1Nq~zh zxe!eSTMkMhDIQD$XnW{hg^*2D8fp?%%%Shd2C_jkGbGDuQp3xhq%lOo2bbEA29qYa z&`Z>I={pWovXn+vi$-<_K5ZHwN=8{$lk>(WCE?=gCIOTvl#V)3u7W1*41>zxmQb7Y zA8w07t)eBNNp{wx@>f77wn6HWjVn*GA@@#`MlTUORW1RSWG|jTU0N)-@Mz^YR0i9k zvn!EKg@0m}Llz?(RI1|~uVUT*p(}_!OQ40E4IyJ7)-_p#d z$RT9oj%ZJjtq@t1D0*OPhj)EI4CNm2evOUX3HXBlx zq$KtuPXqy!pR|t($>~^zGLukB;}Ti~Y{fbjN?A=&vhOqtB@>l!)S_5YLdcRfPD*YF z9u68eE*2_VJ&1Il`?sJcN+1+;v}v}XdPbj@zM>~$(W*5+$7Yfg%Y)<#eNHWEfbwd@xaXa?kN0`y`G+nO( zu1Y}hLPboni_r{0zty5Y z3J?Pl(5bdhc{+Fi)IlLnMs5@y1T0HgCW*nP@p#-@O{We}Xj1~JBaQ0f*2&X?YFSz( z1WXdk2pogPiTh3}4uMSqOS(n2JoCDACZ&?WV~BQ^83Z(M35*~hLqwNh1gb4y_C=)uAo z`F{kY<{}Vm89^|MBm@B(h6I*M(e+?0L-(n|$U&+MGO6mzB zq##iFXje!NLITrE)0MkT{op}WR6+u>Sp~?bBB0wrpxQ%2nu;J>nN-O&g0n_zL#-#8 z6l!W{9TL|V(uR<8G|4C+V3OTK>PI?BrcKv88hS>uc~crQvJF&zBuRCY_+);|t03|~ zf_FklY+IF3(iv-diMh~k)QJJn=F;eBTNa%T6u44@>o`p5Qp*sHNh@#5;1N=6K;|P4 zkqLwjua6uzT^?yFiNKLU7lBO+B z06c|cO``&(WYk1t5=s-8l2Mn*so@@?0`W;;>$LeYXh$X(vYPY~B)}at30Nj+7zENh zmcT{{Ce;`UdpzZogeOk!z|(s?=>ic&;X(649imfhI%H_7s7*Ie8PjBNXqsdY><-T} zr8r)l+OS+XcLZoQn^Z~038)*JWHd6lO9l=3O;WZ<&L>fd0FMjRk+YUmihN4I87u0@ zDN1B&bUtx{D0$@MrNom?skD-GS_KKDyfEap>U3qPJ0#f=xMmhLg49verF1D`i25Y| zNUF-qh*6NbL~+U*0f)3R5)yIptCTTBsU_(vQxm3RSkwj@x(=nFpG*sjz|m+pSXK%Y z1Sa`m2&5M+0S@sgAl{L{@?`-kEQ3zozq_JEK!Q$EhnQNnfTu*w9t{eISPEPA(PFyD zt~yao(}L7dl-faRsC+byW#6tT-E;!FVJJ%bjRpfs0O}HVN^Perty-02S;XD0g|gLD zz@dG0ZFH0=MS3@s;%J)GTvCxqA&S(Z@C*W*;u})mQalEQtVu2!0%;sbV375O6q${3 zkc0FfFr?}=WsKXQGI&4?s$_X>A9W>TfK0bB6$)0$Txx#x4yBm$K04FjNZKG1Ql-tU(k!nki2a z0z*w*`ap6`O{wuM0;nByUlyQFaeLI@(ZX?pK$D6HwSY^(AZ!(Tmsy&$!YM^sDJsb@ zN%Y77I4J-$#c|`QkR#n_0vhQ!33SPLcp`L)4G@r$#*{Xfk5ZJ4Zc3}itElc(KrS!= z+ZQb2sGiWva#dxt8);a&H2f?DT*<(uR3bEXsRwDnbQ$o`s6}OK%W+eX1T(zEpQd!4 zX_KWJNsEDAA;}a|3Jj?{u&z&yP{wL$mkb$zF@nHUS?Fikgk->w3L!_(${Wh+M(WVn zk|92d!AL;fm;#Ohrs~V^l3j;(fTorb0i9t4D6OfMMG;S=uG)1J%C@x7WXy-OjRX|Y zG^H8hH54FYLjr09HLI6OI26`4rPCmNaAad;gb|Q>2mzY&wgMJ0uCyDZurkS}kQ7yn z6f6i_8i6V8N~(+^m!^+RkPbAFvn?+*=c?cXmeoknBhbn2Lm)Mr0wl{!8Ka{ew{%CH zc+_qt5mXwo6`YTk*u|DXcAEeiU6g`nh*n%)X(?c+Jw-W8L16TDe6*3$N|7Bq8aYcy zRj^OagBo?D1tv#?GMGZ}C?9p&X;()%B^!4Q)tgI48K1OzcsA*^8B~U3I+>`jDUN`) zI9fxd1U|Cza6L$uB6Ufqk&)#RFz7-Hsch3ii^idJpA?lc1Emkyq?QayOYg+eH02t} z6{J>?q2yl47%+utB#?%$B?BCW0*>@9EqTnSD)J(QQ%EUGafHAmRVD#NN~9K`iysNd zMzy5NpsVVxPg7awQBy5!h9-i*RDGgM_tCIqW`LyIIs$p#fR z42q;!G7=&UCCio}I?8fXa9;vC<_fq$V5t;MCCCR_#dKGWKm}o?4eO&}OJJ%j$+QG4 zy7EKdQXs%W*-x=A0(8V2^f^qQ0u)0tqN;8g-Q@eH6wwp(-k;v zn)A1p_$=I2lEV`CXkTU2$DpF9Q{~A+sew_;=_K$81XBzOW7yJ`(o~j9 zaa|jy;*-<0<%K5GvBmc|GjVwTg(I%6!4?ScltO+=D z{Z4>hwbW!#$sk~pMW`kxFAe31lX}dc^^?G+eDus!W2l)_VH6TH6`VB*dRyA0lJW5f z8f4QdKxJI=Uy({-hyYiJu3Kz*nJ(3o0aHQ->hM{Pw>E=BPsImxLs*Q+x zKxJU7TJpkL&jO|bYK3JaNj0c+CJH4qF0+75l`NAAplIwdOrpzsGdWJf zD{n^#xO83!NUejCM_#BCuqcusFSA@JXl$w9yhKg1+hx4PqRKMbKqXMc7~UWtPL;q7 z0x~@a5JO1dQgu`Leh5gp%bO@QU^KMUY|*TuI;MOJ%g6!_0h>Y@GC)N8hR0By4GE}y zQ-)TltfgW_2yo{O1tb%TsB2}s`&@Tuz3KU07r-x81!O2AdXBG&|gO%W2b z8@v>(m3Big1&w1`2~?xH^hhMjQjFG?p?TUXsVaO#mGL71nG(_lvS~I_aT-D365FA^ zRPEX-M1sJf;HbR8HK;vXJyeibI-(Tol=hHC93j1NY76Uhx8 zYeO0>WF;X$iDxL_>Qo8cbc@Q+Yds3+ay<#y1Wa|4iRuIgDyvIQ#)q^vDCi)~896ER zZz#?oeRnD&c@~8NRg%nv4~iu?s1f8zaiok7QW_6ZdEDeRiKWUpbSW$Ca3e@%{OWH% z7trbLL!{&kNb2Z{%3VTA@dQbAucC%eQIqlwdM8Cvm+Bj?>d&O>14(Hyb(1d1CDql> z6m-k0Xt10f{P}{e+A-;-!_n29l%zgYj$V>;f)0+PwjQLaycO(^EM}srJOw}{)qFv! z%G<&AY%`c{s{ZKJK1qY^sa-%d;nRXt)uXruQB}{;Re7?fB{kJN)y^XI1%IZYsd2k{ zFh5w2>_16^<8g!Yq;RdIZm_&d{aIc$9*f?;aVYk$s9`Fq`nMDd>lVGsLsZqLJ26RB zzm`w+gT{XFyXwyh@`FV$HcG0gbWJgyML|lUY8>>|lcb8@Eb@dTwS(!w{X<=lsq(H@3sTjd8(dgdZMf}K?bu#b z-wp~4bSX4Noh(ZArHde`LpN%UyxNoH4SC&1RF2M2FTFW3C?;zgc&dm$rMV@mqbW@V zO*De#Fr)!R^Rj#tE|l*&(*ee1qm9H>H7LQBE-+HZOO6>*@~@qs--DXVAnkw(%cMZN zBVR3|&Sf#zN3Wft=CUa2CxJSmsA1&}qE|@OWb~fC~8Go3mX|hkR^@o`zj#kd*k275wkp8EcPHpXv2W08Becsxd z_Q#pnp;cS^!{u{bORnu7E&}?K-v1)g{?km~Kg-nqS*HF+nXVixo{rkMqh9YH=IMW$ zhrIizd9GxhKg*L-{IfjmpXBNPB+vLKdFDUKv;Rq+^H1{JKh0BH==;O9Ry+8odFKC> z*GX=mA-fBezl_0elq78&&G$@_sRh3dNJ<(KR;SL05hFrJ#14v$j!cRSO-Sq?{E2cM z`TGdevsA_O{)=_c!ja0FD#@s@^=#MZ?)|F`$;&v4YT5r{6&V(imnW){ZvKna9C_ho z1S`n^>N7(|lg_rEO+oekY>M;$GCmO3kmjCyc5Iz5Yzmkjo zpZoW{qbvIl-*xY^-%b4=zFW_JxBl__GYg64oMHa^14Wf6BUXRk66#IoneT?z^;zz- zNyfj;Acjx6GSAEbEs0R)U)z0onMMPaFKYbTvoxNW5b}j(x!N;H&rHvJW@k*z`|lol zG(^yWeC9A%q0(gkcLr^R+U~#BtBlFY3Mglv8M%B5<6k{qPZwCeUaOnWjNL}3QVW&8 zPx`MLk`9+9E4l8!C-(l8-+#u$MJB~2#Q%5y`G^1N_um>W$Upx52RxpC{r>Ym|Mwr- zHEz~cilUcGi>6}JLmmeRlkDhX9cGht1tid|6hC_8-FFmq~xCTA&s~o@PmBW+2 zm!zj9pC+XcbmRjYk*~($6SWLJy5l@#&NQVUjI@Ot1)GlDtjwM98;JB4qegKEmXBW0zo> zs*cn*SGDUt9f0Ol&mak=S&ICvl%*?Td;c~`H4*uqQ#-}XMBE?=i?9sJQ7yf{8b7M964-2Y<&Nugm@4Bi;ee0 z#z%V^py`R<{yJO#E6;!L@Jk zo@T)w+mFKZ!JfWYDt?$JF*y zU%%KWPmAQZQ69r1qxhb|IM?JvY{&3=Yxhe?kPGJvbv=DYc{EEU*rASRL~L}7r+t#A zVN7iNh?sa!)nT571CkS?24G5JVr>7YV0yoVL^-`7k%_Ux26&=Wktn$-iKB+eHllEB za-HhO_m7JihAYrH5~*hh4ndB(Z*pv0w47ixZtXBnzr=*W9vOA>M8+izNJ#EKz%wjX z?m5n@Zw&UAJPfCXoq9$jB%*X5g4yv=F`hWNgB}}qU-p-rIO;C}|JUcQ&tIRvK7W1w R`uuPG{2v~AL&X3X0RWiu8l3 Date: Fri, 29 Nov 2019 16:01:04 -0800 Subject: [PATCH 18/35] Better approach for saving beam states --- kglm/models/entity_disc.py | 9 ++++----- kglm/modules/dynamic_embeddings.py | 16 +++++++++++++++- kglm/tests/models/entity_nlm_test.py | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 7bb5d6a..cc5c418 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -1,7 +1,6 @@ """ Discriminative version of EntityNLM for importance sampling. """ -from copy import deepcopy import logging from typing import Any, Dict, List, Optional, Tuple, Union @@ -355,7 +354,7 @@ def _annotation_logp(self, logp = hidden.new_zeros((batch_size, len(beam_states), self.num_possible_annotations)) for i, beam_state in enumerate(beam_states): - self._dynamic_embeddings.load_state_dict(beam_state) + self._dynamic_embeddings.load_beam_state(beam_state) # Entity type log probabilities: (batch_size, 2) entity_type_logits = self._entity_type_projection(hidden) @@ -471,7 +470,7 @@ def _update_beam_states(self, all_prev_num_embeddings = backpointers.new_zeros(batch_size, len(beam_states)) all_prev_last_seen = backpointers.new_zeros(batch_size, len(beam_states), self._max_embeddings) for i, beam_state in enumerate(beam_states): - self._dynamic_embeddings.load_state_dict(beam_state) + self._dynamic_embeddings.load_beam_state(beam_state) all_prev_entity_embeddings[:, i] = self._dynamic_embeddings.embeddings all_prev_num_embeddings[:, i] = self._dynamic_embeddings.num_embeddings all_prev_last_seen[:, i] = self._dynamic_embeddings.last_seen @@ -500,7 +499,7 @@ def _update_beam_states(self, timestep=timestep, mask=entity_types) - new_beam_states.append(deepcopy(self._dynamic_embeddings.state_dict())) + new_beam_states.append(self._dynamic_embeddings.beam_state()) return new_beam_states @@ -593,7 +592,7 @@ def beam_search(self, # Beam search logic predictions: List[Dict[str, torch.Tensor]] = [] - beam_states = [self._dynamic_embeddings.state_dict()] + beam_states = [self._dynamic_embeddings.beam_state()] output = None for timestep in range(sequence_length): # Get log probabilities of annotations diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index 79bdf19..76e95d6 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -1,5 +1,6 @@ -from typing import Dict, Optional +from copy import deepcopy import logging +from typing import Dict, Optional from overrides import overrides import torch @@ -241,3 +242,16 @@ def forward(self, # pylint: disable=arguments-differ out['loss'] = loss return out + + def beam_state(self): + return { + 'embeddings': self.embeddings, + 'num_embeddings': self.num_embeddings, + 'last_seen': self.last_seen + } + + def load_beam_state(self, beam_state): + self.embeddings = beam_state.get('embeddings', None) + self.num_embeddings = beam_state.get('num_embeddings', None) + self.last_seen = beam_state.get('last_seen', None) + diff --git a/kglm/tests/models/entity_nlm_test.py b/kglm/tests/models/entity_nlm_test.py index bc6dadf..8b62e94 100644 --- a/kglm/tests/models/entity_nlm_test.py +++ b/kglm/tests/models/entity_nlm_test.py @@ -42,7 +42,7 @@ def test_annotation_logp(self): # Apply to random hidden state hidden = torch.randn(batch_size, self.model._embedding_dim) - beam_states = [self.model._dynamic_embeddings.state_dict()] + beam_states = [self.model._dynamic_embeddings.beam_state()] logp = self.model._annotation_logp(hidden, timestep=0, beam_states=beam_states) # Check that output has correct shape From cc2daebac2916d04e2f5a4ad4baa051e52b70e6a Mon Sep 17 00:00:00 2001 From: "Robert L. Logan IV" Date: Sat, 30 Nov 2019 11:20:12 -0800 Subject: [PATCH 19/35] Added beam_state methods to RecentEntities --- kglm/modules/recent_entities.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kglm/modules/recent_entities.py b/kglm/modules/recent_entities.py index 00eb0eb..9fd92c8 100644 --- a/kglm/modules/recent_entities.py +++ b/kglm/modules/recent_entities.py @@ -1,3 +1,4 @@ +from copy import deepcopy from typing import Dict, List, Tuple from allennlp.modules.token_embedders import TokenEmbedder @@ -156,3 +157,9 @@ def insert(self, values: torch.LongTensor, mask: torch.ByteTensor = None) -> Non else: self._remaining[i][values[i].item()] = self._cutoff + 1 + def beam_state(self): + beam_state = {'remaining': self._remaining} + return deepcopy(beam_state) + + def load_beam_state(self, beam_state): + self._remaining = beam_state.get('remaining', []) From a044a26f00fcabb625452684a10c0f31b2ca2100 Mon Sep 17 00:00:00 2001 From: "Robert L. Logan IV" Date: Sat, 30 Nov 2019 11:20:53 -0800 Subject: [PATCH 20/35] Ensure that dynamic embeddings outputs are detached --- kglm/modules/dynamic_embeddings.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index 76e95d6..6be3c33 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -244,14 +244,14 @@ def forward(self, # pylint: disable=arguments-differ return out def beam_state(self): - return { - 'embeddings': self.embeddings, - 'num_embeddings': self.num_embeddings, - 'last_seen': self.last_seen + beam_state = { + 'embeddings': self.embeddings.detach(), + 'num_embeddings': self.num_embeddings.detach(), + 'last_seen': self.last_seen.detach() } + return beam_state def load_beam_state(self, beam_state): self.embeddings = beam_state.get('embeddings', None) self.num_embeddings = beam_state.get('num_embeddings', None) self.last_seen = beam_state.get('last_seen', None) - From ca98899b8292615375c03fe0f6e1d436f856f3b4 Mon Sep 17 00:00:00 2001 From: "Robert L. Logan IV" Date: Sun, 1 Dec 2019 20:28:51 -0800 Subject: [PATCH 21/35] Added beam search to KglmDisc --- kglm/data/alias_database.py | 8 + kglm/models/kglm_disc.py | 476 +++++++++++++++++++++++++++++++- kglm/modules/recent_entities.py | 1 - kglm/tests/models/kglm_test.py | 110 +++++++- 4 files changed, 591 insertions(+), 4 deletions(-) diff --git a/kglm/data/alias_database.py b/kglm/data/alias_database.py index 1200469..25427de 100644 --- a/kglm/data/alias_database.py +++ b/kglm/data/alias_database.py @@ -90,6 +90,14 @@ def load(cls, path: str): id_array_lookup=id_array_lookup, token_to_entity_lookup=token_to_entity_lookup) + def nested_token_to_uid(self, e, t): + if isinstance(e, list) and isinstance(t, list): + return [self.nested_token_to_uid(_e, _t) for _e, _t in zip(e, t)] + elif isinstance(e, str) and isinstance(t, str): + return self.token_to_uid(e, t) + else: + raise ValueError(f'Encountered error looking up copy indices:\ne:{e}\nt:{t}') + def token_to_uid(self, entity: str, token: str) -> int: if entity in self._id_map_lookup: id_map = self._id_map_lookup[entity] diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index 873164d..ef76e96 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -1,6 +1,8 @@ import logging +from copy import deepcopy +from collections import namedtuple import math -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from allennlp.data.vocabulary import Vocabulary, DEFAULT_OOV_TOKEN from allennlp.modules import TextFieldEmbedder, Seq2SeqEncoder @@ -9,6 +11,7 @@ from allennlp.nn.util import (get_text_field_mask, masked_log_softmax, masked_softmax, sequence_cross_entropy_with_logits) from allennlp.training.metrics import Average, CategoricalAccuracy, F1Measure, SequenceAccuracy +import numpy as np from overrides import overrides import torch import torch.nn.functional as F @@ -22,6 +25,14 @@ logger = logging.getLogger(__name__) +# Decoding from the KGLM discriminator requires ensuring that: +# * New mentions cannot be of recently mentioned entities. +# * Related mentions must be related to a recently mentioned entity. +# * Ongoing mention cannot continue non-mentions. +# The following structure tracks this information when performing beam search. +KglmBeamState = namedtuple('KglmBeamState', ['recent_entities', 'ongoing']) + + @Model.register('kglm-disc') class KglmDisc(Model): """ @@ -351,6 +362,22 @@ def sample(self, logp = mention_logp + new_entity_logp + derived_entity_logp return {'sample': sample, 'logp': logp} + def get_raw_entity_ids(self, entity_ids: torch.LongTensor) -> torch.LongTensor: + raw_entity_ids = torch.zeros_like(entity_ids) + for *index, entity_id in nested_enumerate(entity_ids.tolist()): + token = self.vocab.get_token_from_index(entity_id, 'entity_ids') + raw_entity_id = self.vocab.get_token_index(token, 'raw_entity_ids') + raw_entity_ids[tuple(index)] = raw_entity_id + return raw_entity_ids + + def get_entity_ids(self, raw_entity_ids: torch.LongTensor) -> torch.LongTensor: + entity_ids = torch.zeros_like(raw_entity_ids) + for *index, raw_entity_id in nested_enumerate(raw_entity_ids.tolist()): + token = self.vocab.get_token_from_index(raw_entity_id, 'raw_entity_ids') + entity_id = self.vocab.get_token_index(token, 'entity_ids') + entity_ids[tuple(index)] = entity_id + return entity_ids + def _encode_source(self, source: Dict[str, torch.Tensor]) -> torch.Tensor: # Extract, embed and encode source tokens. @@ -390,7 +417,7 @@ def _mention_type_loss(self, def _new_entity_logits(self, encoded: torch.Tensor, - shortlist: torch.Tensor) -> torch.Tensor: + shortlist: torch.Tensor = None) -> torch.Tensor: if self._use_shortlist: # Embed the shortlist entries shortlist_embeddings = embedded_dropout( @@ -621,6 +648,451 @@ def _forward_loop(self, return {'loss': loss} + def _next_mention_type_logp(self, next_mention_type_logits, beam_states): + """ + Computes log probabilities of mention type for next token, .e.g, adjusts logits to prevent ongoing non-mentions. + Intended for use when performing beam search. + + Parameters + ========== + next_mention_type_logits: torch.FloatTensor + Tensor of shape (batch_size, num_mention_types) containing next mention type logits. + beam_states: List[KglmBeamState] + List of previous beam states. + + Returns + ======= + next_mention_type_logp: + Tensor of shape (batch_size, beam_width, num_mention_types) containing next mention type log probabilities. + """ + beam_width = len(beam_states) + + # Tile the mention_logits, and apply penalty to non-ongoing mentions + out = next_mention_type_logits.unsqueeze(1).repeat(1, beam_width, 1) + for i, beam_state in enumerate(beam_states): + out[~beam_state.ongoing, i, -1] = -1e32 + + return F.log_softmax(out, dim=-1) + + def _next_new_entity_logp(self, next_new_entity_logits, beam_states): + """ + Computes log probabilities of new entity mentions. + Intended for use when performing beam search. + + Parameters + ========== + next_new_entity_logits: torch.FloatTensor + Tensor of shape (batch_size, num_entities) containing next new entity logits. + beam_states: List[KglmBeamState] + List of previous beam states. + + Returns + ======= + next_new_entity_logp: + Tensor of shape (batch_size, beam_width, num_mention_types) containing next new entity log probabilities. + """ + beam_width = len(beam_states) + # Tile the mention_logits, and apply penalty to non-ongoing mentions + out = next_new_entity_logits.unsqueeze(1).repeat(1, beam_width, 1) + for j, beam_state in enumerate(beam_states): + self._recent_entities.load_beam_state(beam_state.recent_entities) + for i, recent_ids in enumerate(self._recent_entities._remaining): + for recent_id in recent_ids: + out[i, j, recent_id] = -1e32 + return F.log_softmax(out, dim=-1) + + def _next_related_entity_logp(self, next_encoded_head, next_encoded_relation, beam_states): + """ + Computes log probabilities of related entity mentions. + Intended for use when performing beam search. + + Parameters + ========== + next_encoded_head: torch.FloatTensor + Tensor of shape (batch_size, embedding_dim) of the head encodings. + next_encoded_relation: torch.FloatTensor + Tensor of shape (batch_size, embedding_dim) of the relation encodings. + beam_states: List[KglmBeamState] + List of previous beam states. + + Returns + ======= + logp: + Tensor of shape (batch_size, beam_width, num_candidates) containing the log + probability of the parent/relation combination. + And a dictionary containing the annotation data. + parent_ids: + Tensor of shape (batch_size, beam_width, num_candidates) + relation_ids: + Tensor of shape (batch_size, beam_width, num_candidates) + raw_entity_ids: + Tensor of shape (batch_size, beam_width, num_candidates) + """ + batch_size = next_encoded_head.size(0) + beam_width = len(beam_states) + logp_arr = np.empty((batch_size, beam_width), dtype=object) + parent_ids_arr = np.empty((batch_size, beam_width), dtype=object) + relations_arr = np.empty((batch_size, beam_width), dtype=object) + raw_entity_ids_arr = np.empty((batch_size, beam_width), dtype=object) + for j, beam_state in enumerate(beam_states): + # Get the set of candidate parents from the RecentEntities module. + # Since we are only considering candidates for a single timestep we can get the parents + # directly from the RecentEntities._remaining dictionaries' keys. + self._recent_entities.load_beam_state(beam_state.recent_entities) + for i, candidate_ids in enumerate(self._recent_entities._remaining): + # Cast candidate ids to a tensor, lookup embeddings, and compute score. + candidate_ids = torch.LongTensor(list(candidate_ids.keys()), + device=next_encoded_head.device) + candidate_embeddings = self._entity_embedder(candidate_ids) + candidate_logits = torch.mv(candidate_embeddings, next_encoded_head[i]) + candidate_logp = F.log_softmax(candidate_logits) + + # Lookup relations + _, s, r, o = self._knowledge_graph_lookup(candidate_ids) + relation_embeddings_list = [self._relation_embedder(_r) for _r in r] + + # Stop early if node is isolated + if not s: + logp_arr[i, j] = torch.FloatTensor([], device=next_encoded_head.device) + parent_ids_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) + relations_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) + raw_entity_ids_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) + continue + + # Otherwise compute relation probabilities for each parent and combine + temp_logp = [] + temp_parent_ids = [] + temp_relations = [] + temp_raw_entity_ids = [] + for idx, relation_embeddings in enumerate(relation_embeddings_list): + num_relations = relation_embeddings.size(0) + relation_logits = torch.mv(relation_embeddings, next_encoded_relation[i]) + relation_logp = F.log_softmax(relation_logits) + temp_logp.append(candidate_logp[idx] + relation_logp) + temp_parent_ids.append(s[idx].repeat(num_relations)) + temp_relations.append(r[idx]) + temp_raw_entity_ids.append(o[idx]) + logp_arr[i, j] = torch.cat(temp_logp) + parent_ids_arr[i, j] = torch.cat(temp_parent_ids) + relations_arr[i, j] = torch.cat(temp_relations) + raw_entity_ids_arr[i, j] = torch.cat(temp_raw_entity_ids) + + num_candidates = max(t.size(0) for t in logp_arr.flatten()) + logp = next_encoded_head.new_full((batch_size, beam_width, num_candidates), -1e32) + parent_ids = next_encoded_head.new_zeros((batch_size, beam_width, num_candidates), dtype=torch.int64) + relations = next_encoded_head.new_zeros((batch_size, beam_width, num_candidates), dtype=torch.int64) + raw_entity_ids = next_encoded_head.new_zeros((batch_size, beam_width, num_candidates), dtype=torch.int64) + for i in range(batch_size): + for j in range(beam_width): + size = logp_arr[i][j].size(0) + logp[i, j, :size] = logp_arr[i][j] + parent_ids[i, j, :size] = parent_ids_arr[i][j] + relations[i, j, :size] = relations_arr[i][j] + raw_entity_ids[i ,j, :size] = raw_entity_ids_arr[i][j] + + annotations = { + 'parent_ids': parent_ids, + 'relations': relations, + 'raw_entity_ids': raw_entity_ids + } + + return logp, annotations + + def _top_k_annotations(self, + next_mention_type_logp, + next_new_entity_logp, + next_related_entity_logp, + related_entity_annotations, + output, + k): + """ + Aggregate log probabilities and return top-k results. + + Don't be intimidated by the amount of code - almost all of it relates to various + bookkeeping tasks to get the annotations. + """ + # === Bookkeeping ==== + # Need to get all of the relevant sizes + batch_size, beam_width, n_new = next_new_entity_logp.size() + n_related = next_related_entity_logp.size(-1) + + # Derive the length of the full tensor: # new + # related + ongoing + unrelated + length = n_new + n_related + 2 + total_logp = next_mention_type_logp.new_empty(batch_size, beam_width, length) + + # For clarity, name the slices + new_slice = slice(0, n_new) + related_slice = slice(n_new, n_new + n_related) + ongoing_slice = -2 + null_slice = -1 + + # === Annotation lookups === + mention_type_lookup = torch.zeros_like(total_logp, dtype=torch.int64) + parent_id_lookup = torch.zeros_like(total_logp, dtype=torch.int64) + relation_lookup = torch.zeros_like(total_logp, dtype=torch.int64) + raw_entity_id_lookup = torch.zeros_like(total_logp, dtype=torch.int64) + entity_id_lookup = torch.zeros_like(total_logp, dtype=torch.int64) + + # Mention type + mention_type_lookup[:, :, new_slice] = 1 + mention_type_lookup[:, :, related_slice] = 2 + mention_type_lookup[:, :, ongoing_slice] = 3 + mention_type_lookup[:, :, null_slice] = 0 + + # New + id_range = torch.arange(n_new, device=entity_id_lookup.device).view(1, 1, n_new) + entity_id_lookup[:, :, new_slice] = id_range + raw_entity_id_lookup[:, :, new_slice] = self.get_raw_entity_ids(id_range) + + # Related + parent_id_lookup[:, :, related_slice] = related_entity_annotations['parent_ids'] + relation_lookup[:, :, related_slice] = related_entity_annotations['relations'] + raw_entity_id_lookup[:, :, related_slice] = related_entity_annotations['raw_entity_ids'] + entity_id_lookup[:, :, related_slice] = self.get_entity_ids(related_entity_annotations['raw_entity_ids']) + + # Ongoing + if output is not None: + parent_id_lookup[:, :, ongoing_slice] = output['parent_ids'] + relation_lookup[:, :, ongoing_slice] = output['relations'] + entity_id_lookup[:, :, ongoing_slice] = output['entity_ids'] + raw_entity_id_lookup[:, :, ongoing_slice] = output['raw_entity_ids'] + + # === Logp === + + # Set the mention probabilities + total_logp[:, :, new_slice] = next_mention_type_logp[:, :, 1].unsqueeze(-1) + total_logp[:, :, related_slice] = next_mention_type_logp[:, :, 2].unsqueeze(-1) + total_logp[:, :, ongoing_slice] = next_mention_type_logp[:, :, 3] + total_logp[:, :, null_slice] = next_mention_type_logp[:, :, 0] + + # Add the entity probabilities + total_logp[:, :, new_slice] += next_new_entity_logp + total_logp[:, :, related_slice] += next_related_entity_logp + + # If available add the previous beam probabilities + if output is not None: + total_logp += output['logp'].unsqueeze(-1) + + # Get the top-k outputs + top_logp, top_indices = total_logp.view(batch_size, -1).topk(k, dim=-1) + output = { + 'logp': top_logp, + 'backpointers': top_indices // length, + 'mention_types': mention_type_lookup.view(batch_size, -1).gather(-1, top_indices), + 'parent_ids': parent_id_lookup.view(batch_size, -1).gather(-1, top_indices), + 'relations': relation_lookup.view(batch_size, -1).gather(-1, top_indices), + 'entity_ids': entity_id_lookup.view(batch_size, -1).gather(-1, top_indices), + 'raw_entity_ids': raw_entity_id_lookup.view(batch_size, -1).gather(-1, top_indices) + } + return output + + def _update_beam_states(self, output, beam_states): + """ + Ensure that the correct recent entities modules and ongoing flags are properly taken from + the last step and updated using the current predicted outputs. + """ + new_beam_states = [] + backpointers = output['backpointers'] + batch_size, beam_width = backpointers.size() + # To facilitate indexing with the backpointers, we'll store the RecentEntities' _remaining + # dicts in a numpy array. + remaining_dicts = np.empty((batch_size, len(beam_states)), dtype=object) + for j, beam_state in enumerate(beam_states): + self._recent_entities.load_beam_state(beam_state.recent_entities) + for i in range(batch_size): + remaining_dicts[i, j] = self._recent_entities._remaining[i] + + for i in range(beam_width): + # Everything but null mention types can be ongoing in next step. + ongoing = output['mention_types'][:, i] != 0 + + # Trace backpointers to retrieve correct recent entities dicts, and update using the + # current output. + bp = backpointers[:, i].cpu().numpy() + remaining = remaining_dicts[np.arange(batch_size), bp].tolist() + self._recent_entities.load_beam_state({'remaining': remaining}) + self._recent_entities(output['entity_ids'][:, i].unsqueeze(-1)) + + # Add beam states + new_beam_states.append( + KglmBeamState(recent_entities=self._recent_entities.beam_state(), + ongoing=ongoing) + ) + + return new_beam_states + + def _to_raw_entity_tokens(self, x): + """ + Returns the raw entity id strings for a nested list of raw entity ids + """ + if isinstance(x, list): + return [self._to_raw_entity_tokens(i) for i in x] + elif isinstance(x, int): + return self.vocab.get_token_from_index(x, 'raw_entity_ids') + else: + return ValueError('Expecting a nested list of raw entity ids') + + def _trace_backpointers(self, + source, + target, + reset, + metadata, + k, + predictions): + """ + Traces backpointers to collect the top-k annotations. + """ + batch_size, seq_length = source['tokens'].shape + alias_database = metadata[0]['alias_database'] + + new_source = {key: value.unsqueeze(1).repeat(1, k, 1).view(batch_size * k, -1) for key, value in source.items()} + new_target = {key: value.unsqueeze(1).repeat(1, k, 1).view(batch_size * k, -1) for key, value in target.items()} + new_reset = reset.unsqueeze(1).repeat(1, k).view(batch_size * k) + new_metadata = [metadata[i] for i in range(batch_size) for _ in range(k)] + + mention_types = [] + parent_ids = [] + relations = [] + raw_entity_ids = [] + entity_ids = [] + + backpointer = None + + for prediction in reversed(predictions): + if backpointer is None: + mention_types.append(prediction['mention_types']) + parent_ids.append(prediction['parent_ids']) + relations.append(prediction['relations']) + raw_entity_ids.append(prediction['raw_entity_ids']) + entity_ids.append(prediction['entity_ids']) + else: + mention_types.append(prediction['mention_types'].gather(1, backpointer)) + parent_ids.append(prediction['parent_ids'].gather(1, backpointer)) + relations.append(prediction['relations'].gather(1, backpointer)) + raw_entity_ids.append(prediction['raw_entity_ids'].gather(1, backpointer)) + entity_ids.append(prediction['entity_ids'].gather(1, backpointer)) + if backpointer is None: + backpointer = prediction['backpointers'] + else: + backpointer = prediction['backpointers'].gather(1, backpointer) + + mention_types = torch.stack(mention_types[::-1], dim=-1).view(batch_size * k, -1) + parent_ids = torch.stack(parent_ids[::-1], dim=-1).view(batch_size * k, -1) + relations = torch.stack(relations[::-1], dim=-1).view(batch_size * k, -1) + raw_entity_ids = torch.stack(raw_entity_ids[::-1], dim=-1).view(batch_size * k, -1) + entity_ids = torch.stack(entity_ids[::-1], dim=-1).view(batch_size * k, -1) + + # One final bit of complexity - we need to get copy indices. + raw_entity_tokens = self._to_raw_entity_tokens(raw_entity_ids.tolist()) + target_tokens = [x['target_tokens'] for x in new_metadata] + alias_copy_inds_list = alias_database.nested_token_to_uid(raw_entity_tokens, target_tokens) + alias_copy_inds = torch.tensor(alias_copy_inds_list, device=mention_types.device) + + return { + 'source': new_source, + 'target': new_target, + 'reset': new_reset, + 'metadata': new_metadata, + 'mention_types': mention_types, + 'parent_ids': parent_ids, + 'relations': relations, + 'raw_entity_ids': raw_entity_ids, + 'entity_ids': entity_ids, + 'alias_copy_inds': alias_copy_inds + } + + def beam_search(self, + source: Dict[str, torch.Tensor], + target: Dict[str, torch.Tensor], + reset: torch.ByteTensor, + metadata: Dict[str, Any], + k: int) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Obtain the top-k (approximately) most likely predictions from the model using beam + search. Unlike typical beam search all of the beam states are returned instead of just + the most likely. + + The returned candidates are intended to be marginalized over to obtain an upper bound for + the token-level perplexity of the EntityNLM. + + Parameters + ========== + source : ``Dict[str, torch.Tensor]`` + A tensor of shape ``(batch_size, sequence_length)`` containing the sequence of + tokens. + reset : ``torch.ByteTensor`` + Whether or not to reset the model's state. This should be done at the start of each + new sequence. + metadata : ``Dict[str, Any]`` + Assorted metadata. Should contain the alias database, as well as the token strings (needed to retrieve copy indices). + k : ``int`` + Number of predictions to return. + + Returns + ======= + predictions : ``torch.Tensor`` + A tensor of shape ``(batch_size * k, sequence_length)`` containing the top-k + predictions. + logp : ``torch.Tensor`` + The log-probabilities of each prediction. WARNING: These are returned purely for + diagnostic purposes and should not be factored in the the perplexity calculation. + """ + # We want the output fields to be properly aligned for the generative model, which makes + # predictions for the **target** tokens! Hence, we feed them as the input (instead of the + # source tokens). + batch_size, sequence_length = target['tokens'].shape + + # Reset the model's internal state. + if not reset.all(): + raise RuntimeError('Detecting that not all states are being `reset` (e.g., that input ' + 'sequences have been split). Cannot predict top-K annotations in ' + 'this setting!') + self.reset_states(reset) + + # The following tensors can be computed using only the encoder: + # * The 3-headed encodings. + # * The (unconstrained) mention type logits. + # * The (unconstrained) new entity logits. + # Although we can compute the mention type and new entity logits, we will need to compute + # the log-probabilities during decoding due to the following constraints: + # * `mention_type` = CONTINUE only if the previous token type was a new or ongoing mention. + # * `new_entity` cannot be in recent entities. + encoded, *_ = self._encode_source(target['tokens']) + splits = [self.token_embedding_dim] + [self.entity_embedding_dim] * 2 + encoded_token, encoded_head, encoded_relation = encoded.split(splits, dim=-1) + mention_type_logits = self._fc_mention_type(encoded_token) + new_entity_logits = self._new_entity_logits(encoded_head + encoded_relation) + + # Beam search logic + predictions: List[Dict[str, torch.Tensor]] = [] + beam_states = [KglmBeamState(recent_entities=self._recent_entities.beam_state(), + ongoing=torch.zeros_like(reset))] + output = None + + for timestep in range(sequence_length): + # Get log probabilities of all next states + next_mention_type_logp = self._next_mention_type_logp(mention_type_logits[:, timestep], + beam_states) + next_new_entity_logp = self._next_new_entity_logp(new_entity_logits[:, timestep], + beam_states) + next_related_entity_logp, related_entity_annotations = self._next_related_entity_logp( + encoded_head[:, timestep], + encoded_relation[:, timestep], + beam_states) + + output = self._top_k_annotations(next_mention_type_logp, + next_new_entity_logp, + next_related_entity_logp, + related_entity_annotations, + output, + k) + beam_states = self._update_beam_states(output, beam_states) + predictions.append(output) + + annotation = self._trace_backpointers(source, target, reset, metadata, k, predictions) + + return annotation + @overrides def train(self, mode=True): # This is a hack to ensure that the internal state resets when the model switches from diff --git a/kglm/modules/recent_entities.py b/kglm/modules/recent_entities.py index 9fd92c8..d4cfe05 100644 --- a/kglm/modules/recent_entities.py +++ b/kglm/modules/recent_entities.py @@ -73,7 +73,6 @@ def __call__(self, k = candidate_lookup[i][parent_id] candidate_mask[i, j + 1 : j + self._cutoff + 1, k] = 1 # Track how many sequence elements remain - remainder = sequence_length - (j + self._cutoff + 1) self._remaining[i][parent_id] = (j + self._cutoff + 1) - sequence_length # Remove any ids for non-recent parents (e.g. those without remaining mask) diff --git a/kglm/tests/models/kglm_test.py b/kglm/tests/models/kglm_test.py index 54307c6..82991da 100644 --- a/kglm/tests/models/kglm_test.py +++ b/kglm/tests/models/kglm_test.py @@ -8,7 +8,8 @@ from kglm.common.testing import KglmModelTestCase from kglm.data.dataset_readers.enhanced_wikitext import EnhancedWikitextKglmReader from kglm.models.kglm import Kglm -from kglm.models.kglm_disc import KglmDisc +from kglm.models.kglm_disc import KglmDisc, KglmBeamState +from kglm.modules import RecentEntities class KglmTest(KglmModelTestCase): @@ -87,3 +88,110 @@ def test_sample(self): torch.manual_seed(123) logp2 = self.model.sample(**batch).get('logp', None) + def test_beam_search(self): + generator_params = Params.from_file("kglm/tests/fixtures/training_config/kglm.no-shortlist.json") + params = Params.from_file(self.param_file) + dataset_file = "kglm/tests/fixtures/enhanced-wikitext-test/train.jsonl" + + # Need instances from 'generative' reader! + reader_params = generator_params['dataset_reader'] + reader_params['mode'] = 'generative' + reader = DatasetReader.from_params(reader_params) + instances = list(reader.read(dataset_file)) + + iterator = DataIterator.from_params(generator_params['iterator']) + iterator.index_with(self.model.vocab) + batch, _ = next(iterator(instances, shuffle=False)) + + # Just want to check that function does not raise an error for now. + self.model.beam_search(batch['source'], + batch['target'], + batch['reset'], + batch['metadata'], + k=5) + + def test_next_mention_type_logp(self): + # Checks whether penalty correctly applied to ongoing mentions + batch_size = 1 + num_classes = 2 + k = 2 + + # All mention types have equal prob + next_mention_type_logits = torch.ones(batch_size, num_classes) + + # First beam has an ongoing mention, second does not + recent_entities_state = self.model._recent_entities.beam_state() + ongoing_0 = torch.ones(batch_size, dtype=torch.uint8) + ongoing_1 = torch.zeros(batch_size, dtype=torch.uint8) + beam_states = [ + KglmBeamState(recent_entities=recent_entities_state, ongoing=ongoing_0), + KglmBeamState(recent_entities=recent_entities_state, ongoing=ongoing_1) + ] + + next_mention_type_logp = self.model._next_mention_type_logp(next_mention_type_logits, beam_states) + # Log probabilities should be same on first beam, and different on second. + assert torch.allclose(next_mention_type_logp[0, 0, 0], next_mention_type_logp[0, 0, 1]) + assert not torch.allclose(next_mention_type_logp[0, 1, 0], next_mention_type_logp[0, 1, 1]) + # Log probability of first state (e.g., non-ongoing) should be close to 0.0 on second beam. + assert torch.allclose(next_mention_type_logp[0, 1, 0], torch.tensor(0.0)) + + def test_next_new_entity_logp(self): + # Checks whether penalty correctly applied to previously mentioned entities + batch_size = 1 + num_entities = 2 + k = 2 + + # All next entities have equal prob + next_new_entity_logits = torch.ones(batch_size, num_entities) + + # First entity is previously mentioned on first beam. + # No previous mentions on second beam. + ongoing = None # Value doesn't matter + recent_entities_state_0 = {'remaining': [{0 : None}]} + recent_entities_state_1 = {'remaining': [{}]} + beam_states = [ + KglmBeamState(recent_entities=recent_entities_state_0, ongoing=ongoing), + KglmBeamState(recent_entities=recent_entities_state_1, ongoing=ongoing) + ] + + next_new_entity_logp = self.model._next_new_entity_logp(next_new_entity_logits, beam_states) + # Log probabilities should be different on first beam, and same on second. + assert not torch.allclose(next_new_entity_logp[0, 0, 0], next_new_entity_logp[0, 0, 1]) + assert torch.allclose(next_new_entity_logp[0, 1, 0], next_new_entity_logp[0, 1, 1]) + # Log probability of non-recent entity should be close to 0.0 on first beam. + assert torch.allclose(next_new_entity_logp[0, 0, 1], torch.tensor(0.0)) + + def test_next_related_entity_logp(self): + # Checks that: + # * There is no probability mass if there are no candidates + # * Probability distribution is valid if there are candidates + # * Annotations look correct (e.g., parents ids are consistent) + batch_size = 1 + k = 2 + + next_encoded_head = torch.randn((batch_size, self.model.entity_embedding_dim)) + next_encoded_relation = torch.randn((batch_size, self.model.entity_embedding_dim)) + ongoing = None # Value doesn't matter + + # NOTE: `parent_id` = 5 chosen since this node in the knowledge graph has a relatively + # small number of outgoing edges. + recent_entities_state_0 = {'remaining': [{5 : None}]} + recent_entities_state_1 = {'remaining': [{}]} + recent_entities_state_2 = {'remaining': [{5: None, 6: None}]} + beam_states = [ + KglmBeamState(recent_entities=recent_entities_state_0, ongoing=ongoing), + KglmBeamState(recent_entities=recent_entities_state_1, ongoing=ongoing), + KglmBeamState(recent_entities=recent_entities_state_2, ongoing=ongoing) + ] + + logp, annotations = self.model._next_related_entity_logp(next_encoded_head, + next_encoded_relation, + beam_states) + # Only first and last states will have probability mass + assert torch.allclose(logp[0, 0].exp().sum(), torch.tensor(1.0)) + assert torch.allclose(logp[0, 1].exp().sum(), torch.tensor(0.0)) + assert torch.allclose(logp[0, 2].exp().sum(), torch.tensor(1.0)) + + assert annotations['parent_ids'][0, 0].unique().size(0) == 2 # ids: 0, 5 + assert annotations['parent_ids'][0, 1].unique().size(0) == 1 # ids: 0 + assert annotations['parent_ids'][0, 2].unique().size(0) == 2 # ids: 0, 5, 6 From eb97da7fc01594c9219c35b8be8c50f3da652cfb Mon Sep 17 00:00:00 2001 From: rloganiv Date: Tue, 3 Dec 2019 14:15:36 -0800 Subject: [PATCH 22/35] Fixed token error in Conll2012JsonlReader --- kglm/data/dataset_readers/conll2012.py | 8 +++----- kglm/tests/dataset_readers/conll2012_test.py | 18 +++++++----------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/kglm/data/dataset_readers/conll2012.py b/kglm/data/dataset_readers/conll2012.py index 2463666..a663b1a 100644 --- a/kglm/data/dataset_readers/conll2012.py +++ b/kglm/data/dataset_readers/conll2012.py @@ -233,8 +233,6 @@ def text_to_instance(self, tokens: List[str], clusters: Dict[str, List[Tuple[int, int]]]) -> Instance: # pylint: disable=arguments-differ - tokens = [_normalize_word(x, self._replace_numbers) for x in tokens] - tokens = ['@@START@@', *tokens, '@@END@@'] fields = {'source': TextField([Token(x) for x in tokens], self._token_indexers)} entity_types = np.zeros(shape=(len(tokens),)) @@ -244,9 +242,9 @@ def text_to_instance(self, for i, cluster in enumerate(clusters.values()): for span in cluster: start, end = span - entity_types[(start + 1 - self._offset):(end + 1 - self._offset)] = 1 - entity_ids[(start + 1 - self._offset):(end + 1 - self._offset)] = i + 1 - mention_lengths[(start + 1 - self._offset):(end + 1 - self._offset)] = np.arange(end - start, 0, step=-1) - 1 + entity_types[(start - self._offset):(end - self._offset)] = 1 + entity_ids[(start - self._offset):(end - self._offset)] = i + 1 + mention_lengths[(start - self._offset):(end - self._offset)] = np.arange(end - start, 0, step=-1) - 1 fields['entity_types'] = SequentialArrayField(entity_types, dtype=np.uint8) fields['entity_ids'] = SequentialArrayField(entity_ids, dtype=np.int64) diff --git a/kglm/tests/dataset_readers/conll2012_test.py b/kglm/tests/dataset_readers/conll2012_test.py index 2d84435..b06573a 100644 --- a/kglm/tests/dataset_readers/conll2012_test.py +++ b/kglm/tests/dataset_readers/conll2012_test.py @@ -70,20 +70,16 @@ def test_read_from_file(self, lazy, offset): assert len(instances) == 2 first_instance_tokens = [x.text for x in instances[0]['source'].tokens] - assert first_instance_tokens[:5] == ["@@START@@", "Jesus", "left", "and", "went"] - assert first_instance_tokens[-5:] == [ "long", "ago", ".", "''", "@@END@@"] + assert first_instance_tokens[:5] == ["@@START@@", "in", "the", "summer", "of"] + assert first_instance_tokens[-5:] == [ "mainland", "china", "tourist", "market", "@@END@@"] second_instance_entity_ids = instances[1]['entity_ids'].array second_instance_mention_lengths = instances[1]['mention_lengths'].array second_instance_entity_types = instances[1]['entity_types'].array - np.testing.assert_allclose(second_instance_entity_types[(1 - offset):(3 - offset)], - np.array([1,0], dtype=np.uint8)) - np.testing.assert_allclose(second_instance_entity_ids[(1 - offset):(2 - offset)], - np.array([1], dtype=np.int64)) - np.testing.assert_allclose(second_instance_entity_ids[(8 - offset):(9 - offset)], - np.array([1], dtype=np.int64)) - np.testing.assert_allclose(second_instance_entity_ids[(30 - offset):(32 - offset)], + np.testing.assert_allclose(second_instance_entity_types[(9 - offset):(13 - offset)], + np.array([0,1,1,0], dtype=np.uint8)) + np.testing.assert_allclose(second_instance_entity_ids[(10 - offset):(12 - offset)], np.array([1, 1], dtype=np.int64)) - np.testing.assert_allclose(second_instance_mention_lengths[(30 - offset):(32 - offset)], - np.array([1, 0], dtype=np.int64)) \ No newline at end of file + np.testing.assert_allclose(second_instance_mention_lengths[(10 - offset):(12 - offset)], + np.array([1, 0], dtype=np.int64)) From e64a0f24a7d670224203a33be962ecd18c0afc02 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Tue, 3 Dec 2019 15:00:06 -0800 Subject: [PATCH 23/35] Addressed stupid context embedding mismatch when resetting --- kglm/models/entity_nlm.py | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/kglm/models/entity_nlm.py b/kglm/models/entity_nlm.py index 517ccd9..de27d5b 100644 --- a/kglm/models/entity_nlm.py +++ b/kglm/models/entity_nlm.py @@ -96,27 +96,22 @@ def __init__(self, out_features=2, bias=False) self._dynamic_embeddings = DynamicEmbedding(embedding_dim=embedding_dim, - max_embeddings=max_embeddings) + max_embeddings=max_embeddings, + tied_weight=self._entity_type_projection.weight) # For mention length prediction self._mention_length_projection = torch.nn.Linear(in_features=2*embedding_dim, out_features=max_mention_length) # For next word prediction - self._dummy_context_embedding = Parameter(F.normalize(torch.randn(1, embedding_dim))) # TODO: Maybe squeeze self._entity_output_projection = torch.nn.Linear(in_features=embedding_dim, out_features=embedding_dim, bias=False) - self._context_output_projection = torch.nn.Linear(in_features=embedding_dim, - out_features=embedding_dim, - bias=False) self._vocab_projection = torch.nn.Linear(in_features=embedding_dim, out_features=vocab.get_vocab_size('tokens')) if tie_weights: self._vocab_projection.weight = self._text_field_embedder._token_embedders['tokens'].weight # pylint: disable=W0212 - # self._perplexity = Perplexity() - # self._unknown_penalized_perplexity = UnknownPenalizedPerplexity(self.vocab) self._entity_type_accuracy = CategoricalAccuracy() self._entity_id_accuracy = CategoricalAccuracy() self._mention_length_accuracy = CategoricalAccuracy() @@ -227,7 +222,7 @@ def _forward_loop(self, contexts = self._state['prev_contexts'] sequence_length += 1 else: - contexts = self._dummy_context_embedding.repeat(batch_size, 1) + contexts = tokens['tokens'].new_zeros(batch_size, self._embedding_dim, dtype=torch.float32) # Embed tokens and get RNN hidden state. mask = get_text_field_mask(tokens).byte() @@ -343,17 +338,23 @@ def _forward_loop(self, # Always predict the next word. This is done using the hidden state and contextual bias. entity_embeddings = self._dynamic_embeddings.embeddings[next_entity_types, next_entity_ids[next_entity_types]] - entity_embeddings = self._entity_output_projection(entity_embeddings) + # entity_embeddings = self._entity_output_projection(entity_embeddings) context_embeddings = contexts[~next_entity_types] - context_embeddings = self._context_output_projection(context_embeddings) + # context_embeddings = self._context_output_projection(context_embeddings) - # The checks in the following block of code are required to prevent adding empty - # tensors to vocab_features (which causes a floating point error). - vocab_features = current_hidden.clone() + # Combine entity and context embeddings + combined_embeddings = torch.zeros_like(current_hidden) if next_entity_types.any(): - vocab_features[next_entity_types] = vocab_features[next_entity_types] + entity_embeddings + combined_embeddings[next_entity_types] = entity_embeddings if (~next_entity_types).any(): - vocab_features[~next_entity_types] = vocab_features[~next_entity_types] + context_embeddings + combined_embeddings[~next_entity_types] = context_embeddings + + # Project + combined_embeddings_proj = self._entity_output_projection(combined_embeddings) + + # The checks in the following block of code are required to prevent adding empty + # tensors to vocab_features (which causes a floating point error). + vocab_features = current_hidden + combined_embeddings_proj vocab_logits = self._vocab_projection(vocab_features[next_mask & current_mask]) vocab_logp = F.log_softmax(vocab_logits, -1) _vocab_loss = -vocab_logp.gather(-1, next_tokens[next_mask & current_mask].unsqueeze(-1)) @@ -372,7 +373,7 @@ def _forward_loop(self, # mask=next_mask.float()) # Lastly update contexts - contexts = current_hidden + contexts = combined_embeddings self._perplexity(vocab_loss, mask.sum()) @@ -420,7 +421,7 @@ def reset_states(self, reset: torch.ByteTensor) -> None: self._state['prev_entity_types'][reset] = 0 self._state['prev_entity_ids'][reset] = 0 self._state['prev_mention_lengths'][reset] = 0 - self._state['prev_contexts'][reset] = 0 + self._state['prev_contexts'][reset] = 0.0 # Reset the dynamic embeddings self._dynamic_embeddings.reset_states(reset) @@ -437,13 +438,11 @@ def train(self, mode=True): # batch sizes (e.g. the `reset` tensor will not be the right size). In future # implementations this should be handled more robustly. super().train(mode) - self._state = None @overrides def eval(self): # TODO: See train. super().eval() - self._state = None @overrides def get_metrics(self, reset: bool = False) -> Dict[str, float]: @@ -456,4 +455,4 @@ def get_metrics(self, reset: bool = False) -> Dict[str, float]: 'ppl': self._perplexity.get_metric(reset) } - hidden = self._rnn(embeddings) \ No newline at end of file + hidden = self._rnn(embeddings) From 340f85079acf2680251e050854f514dcc8d2c76e Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:41:52 -0800 Subject: [PATCH 24/35] Updated experiment parameters --- experiments/conll_2012_vocab.jsonnet | 17 +++ experiments/entity_disc.jsonnet | 4 +- experiments/entity_disc_conll2012.jsonnet | 6 - .../entity_disc_conll2012_no_peeking.jsonnet | 24 ++-- experiments/entity_disc_conll2012_prp.jsonnet | 64 ++++++++++ experiments/entity_nlm.jsonnet | 2 +- experiments/entity_nlm_conll2012.jsonnet | 34 +++-- experiments/entity_nlm_conll2012_prp.jsonnet | 65 ++++++++++ experiments/kglm-copy.jsonnet | 116 ++++++++++++++++++ 9 files changed, 288 insertions(+), 44 deletions(-) create mode 100644 experiments/conll_2012_vocab.jsonnet create mode 100644 experiments/entity_disc_conll2012_prp.jsonnet create mode 100644 experiments/entity_nlm_conll2012_prp.jsonnet create mode 100644 experiments/kglm-copy.jsonnet diff --git a/experiments/conll_2012_vocab.jsonnet b/experiments/conll_2012_vocab.jsonnet new file mode 100644 index 0000000..e1c9738 --- /dev/null +++ b/experiments/conll_2012_vocab.jsonnet @@ -0,0 +1,17 @@ +{ + "vocabulary": { + "type": "extended", + "max_vocab_size": {"tokens": 10000} + }, + "datasets_for_vocab_creation": ["train"], + "dataset_reader": { + "type": "conll2012_jsonl", + "token_indexers": { + "tokens": { + "type": "single_id", + "lowercase_tokens": false + } + } + }, + "train_data_path": "data/conll-2012/processed/train.jsonl", +} diff --git a/experiments/entity_disc.jsonnet b/experiments/entity_disc.jsonnet index 75c6f73..4b9e9e7 100644 --- a/experiments/entity_disc.jsonnet +++ b/experiments/entity_disc.jsonnet @@ -37,7 +37,7 @@ "trainer": { "type": "lm", "cuda_device": 0, - "num_epochs": 750, + "num_epochs": 13, "optimizer": { "type": "adam", "lr": 0.0003 @@ -48,4 +48,4 @@ "directory_path": "data/enhanced-wikitext-2/vocab", "extend": false } -} \ No newline at end of file +} diff --git a/experiments/entity_disc_conll2012.jsonnet b/experiments/entity_disc_conll2012.jsonnet index 2cee49d..b5fef64 100644 --- a/experiments/entity_disc_conll2012.jsonnet +++ b/experiments/entity_disc_conll2012.jsonnet @@ -6,12 +6,6 @@ }, "dataset_reader": { "type": "conll2012_jsonl", - "token_indexers": { - "tokens": { - "type": "single_id", - "lowercase_tokens": true - } - } }, "train_data_path": "data/conll-2012/processed/train.jsonl", "validation_data_path": "data/conll-2012/processed/dev.jsonl", diff --git a/experiments/entity_disc_conll2012_no_peeking.jsonnet b/experiments/entity_disc_conll2012_no_peeking.jsonnet index 5b9b5bf..ecbf5e1 100644 --- a/experiments/entity_disc_conll2012_no_peeking.jsonnet +++ b/experiments/entity_disc_conll2012_no_peeking.jsonnet @@ -6,13 +6,7 @@ }, "dataset_reader": { "type": "conll2012_jsonl", - "offset": 1, - "token_indexers": { - "tokens": { - "type": "single_id", - "lowercase_tokens": true - } - } + "offset": 1 }, "train_data_path": "data/conll-2012/processed/train.jsonl", "validation_data_path": "data/conll-2012/processed/dev.jsonl", @@ -22,13 +16,13 @@ "token_embedders": { "tokens": { "type": "embedding", - "embedding_dim": 128, + "embedding_dim": 256, "trainable": true }, }, }, - "embedding_dim": 128, - "hidden_size": 128, + "embedding_dim": 256, + "hidden_size": 256, "num_layers": 1, "max_mention_length": 100, "max_embeddings": 100, @@ -37,8 +31,8 @@ }, "iterator": { "type": "fancy", - "batch_size": 16, - "split_size": 15, + "batch_size": 343, + "split_size": 30, "splitting_keys": [ "source", "entity_types", @@ -48,8 +42,8 @@ }, "validation_iterator": { "type": "fancy", - "batch_size": 16, - "split_size": 15, + "batch_size": 343, + "split_size": 128, "splitting_keys": [ "source", "entity_types", @@ -64,7 +58,7 @@ "cuda_device": 0, "optimizer": { "type": "adam", - "lr": 1e-4 + "lr": 1e-3 }, "validation_metric": "+eid_acc" } diff --git a/experiments/entity_disc_conll2012_prp.jsonnet b/experiments/entity_disc_conll2012_prp.jsonnet new file mode 100644 index 0000000..5aaaae5 --- /dev/null +++ b/experiments/entity_disc_conll2012_prp.jsonnet @@ -0,0 +1,64 @@ +{ + "vocabulary": { + "type": "extended", + "extend": false, + "directory_path": "/kermit/rlogan/entity-nlm/data/vocabulary" + }, + "dataset_reader": { + "type": "conll2012_jsonl", + }, + "train_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/train.jsonl", + "validation_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/dev.jsonl", + "model": { + "type": "entitydisc", + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 256, + "trainable": true + }, + }, + }, + "embedding_dim": 256, + "hidden_size": 256, + "num_layers": 1, + "max_mention_length": 100, + "max_embeddings": 100, + "dropout_rate": 0.4, + "variational_dropout_rate": 0.1 + }, + "iterator": { + "type": "fancy", + "batch_size": 343, + "split_size": 30, + "splitting_keys": [ + "source", + "entity_types", + "entity_ids", + "mention_lengths" + ], + }, + "validation_iterator": { + "type": "fancy", + "batch_size": 343, + "split_size": 128, + "splitting_keys": [ + "source", + "entity_types", + "entity_ids", + "mention_lengths" + ], + "truncate": false + }, + "trainer": { + "type": "lm", + "num_epochs": 400, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 1e-3 + }, + "validation_metric": "+eid_acc" + } +} diff --git a/experiments/entity_nlm.jsonnet b/experiments/entity_nlm.jsonnet index 9bfc403..559e9ab 100644 --- a/experiments/entity_nlm.jsonnet +++ b/experiments/entity_nlm.jsonnet @@ -53,7 +53,7 @@ "num_epochs": 750, "optimizer": { "type": "adam", - "lr": 0.0003 + "lr": 0.0001 } }, "vocabulary": { diff --git a/experiments/entity_nlm_conll2012.jsonnet b/experiments/entity_nlm_conll2012.jsonnet index 609151b..07b1c56 100644 --- a/experiments/entity_nlm_conll2012.jsonnet +++ b/experiments/entity_nlm_conll2012.jsonnet @@ -1,17 +1,11 @@ { "vocabulary": { "type": "extended", - "extend": false, - "directory_path": "data/vocabulary" + "directory_path": "data/vocabulary", + "extend": false }, "dataset_reader": { "type": "conll2012_jsonl", - "token_indexers": { - "tokens": { - "type": "single_id", - "lowercase_tokens": true - } - } }, "train_data_path": "data/conll-2012/processed/train.jsonl", "validation_data_path": "data/conll-2012/processed/dev.jsonl", @@ -22,24 +16,24 @@ "token_embedders": { "tokens": { "type": "embedding", - "embedding_dim": 256, + "embedding_dim": 300, "trainable": true }, }, }, - "embedding_dim": 256, - "hidden_size": 256, + "embedding_dim": 300, + "hidden_size": 300, "num_layers": 1, "max_mention_length": 100, "max_embeddings": 100, - "tie_weights": true, - "dropout_rate": 0.4, - "variational_dropout_rate": 0.1 + "tie_weights": false, + "dropout_rate": 0.1, + "variational_dropout_rate": 0.2 }, "iterator": { "type": "fancy", - "batch_size": 512, - "split_size": 15, + "batch_size": 256, + "split_size": 120, "splitting_keys": [ "source", "entity_types", @@ -49,8 +43,8 @@ }, "validation_iterator": { "type": "fancy", - "batch_size": 512, - "split_size": 15, + "batch_size": 343, + "split_size": 128, "splitting_keys": [ "source", "entity_types", @@ -61,11 +55,11 @@ }, "trainer": { "type": "lm", - "num_epochs": 40, + "num_epochs": 400, "cuda_device": 0, "optimizer": { "type": "adam", - "lr": 1e-3 + "lr": 1e-3, } } } diff --git a/experiments/entity_nlm_conll2012_prp.jsonnet b/experiments/entity_nlm_conll2012_prp.jsonnet new file mode 100644 index 0000000..f49ab2c --- /dev/null +++ b/experiments/entity_nlm_conll2012_prp.jsonnet @@ -0,0 +1,65 @@ +{ + "vocabulary": { + "type": "extended", + "directory_path": "/kermit/rlogan/entity-nlm/data/vocabulary", + "extend": false + }, + "dataset_reader": { + "type": "conll2012_jsonl", + }, + "train_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/train.jsonl", + "validation_data_path": "/kermit/rlogan/entity-nlm/data/conll-2012/processed/dev.jsonl", + "datasets_for_vocab_creation": ["train"], + "model": { + "type": "entitynlm", + "text_field_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 200, + "trainable": true + }, + }, + }, + "embedding_dim": 200, + "hidden_size": 200, + "num_layers": 1, + "max_mention_length": 100, + "max_embeddings": 100, + "tie_weights": false, + "dropout_rate": 0.2, + "variational_dropout_rate": 0.2 + }, + "iterator": { + "type": "fancy", + "batch_size": 60, + "split_size": 70, + "splitting_keys": [ + "source", + "entity_types", + "entity_ids", + "mention_lengths" + ], + }, + "validation_iterator": { + "type": "fancy", + "batch_size": 60, + "split_size": 70, + "splitting_keys": [ + "source", + "entity_types", + "entity_ids", + "mention_lengths" + ], + "truncate": false + }, + "trainer": { + "type": "lm", + "num_epochs": 400, + "cuda_device": 0, + "optimizer": { + "type": "adam", + "lr": 3e-4, + } + } +} diff --git a/experiments/kglm-copy.jsonnet b/experiments/kglm-copy.jsonnet new file mode 100644 index 0000000..490234c --- /dev/null +++ b/experiments/kglm-copy.jsonnet @@ -0,0 +1,116 @@ +{ + "vocabulary": { + "type": "extended", + "extend": false, + "directory_path": "data/linked-wikitext-2/vocab" + }, + "dataset_reader": { + "type": "enhanced-wikitext-kglm", + "alias_database_path": "data/linked-wikitext-2/alias.pkl" + }, + "train_data_path": "data/linked-wikitext-2/train.jsonl", + "validation_data_path": "data/linked-wikitext-2/valid.jsonl", + "model": { + "type": "kglm", + "token_embedder": { + "token_embedders": { + "tokens": { + "type": "embedding", + "embedding_dim": 400, + "trainable": true + } + } + }, + "entity_embedder": { + "token_embedders": { + "entity_ids": { + "type": "embedding", + "pretrained_file": "data/linked-wikitext-2/embeddings.entities.txt", + "embedding_dim": 256, + "trainable": false, + "vocab_namespace": "entity_ids" + } + } + }, + "relation_embedder": { + "token_embedders": { + "relations": { + "type": "embedding", + "pretrained_file": "data/linked-wikitext-2/embeddings.relations.txt", + "embedding_dim": 256, + "trainable": true, + "vocab_namespace": "relations" + } + } + }, + "alias_encoder": { + "type": "lstm", + "input_size": 400, + "hidden_size": 400 + }, + "knowledge_graph_path": "data/linked-wikitext-2/knowledge_graph.pkl", + "use_shortlist": false, + "hidden_size": 1150, + "num_layers": 3, + "cutoff": 30, + "tie_weights": true, + "initializer": [ + ["token_embedder.weight", {"type": "uniform", "a": -0.1, "b": 0.1}], + ["decoder.bias", {"type": "constant", "val": 0.0}] + ] + }, + "iterator": { + "type": "fancy", + "batch_size": 60, + "split_size": 70, + "splitting_keys": [ + "source", + "target", + "mention_type", + "raw_entity_ids", + "entity_ids", + "parent_ids", + "relations", + "shortlist_inds", + "alias_copy_inds" + ] + }, + "validation_iterator": { + "type": "fancy", + "batch_size": 60, + "split_size": 70, + "splitting_keys": [ + "source", + "target", + "mention_type", + "raw_entity_ids", + "entity_ids", + "parent_ids", + "relations", + "shortlist_inds", + "alias_copy_inds" + ], + "truncate": false + }, + "trainer": { + "type": "lm", + "num_epochs": 500, + "cuda_device": 0, + // "grad_clipping": 0.25, + // "optimizer": { + // "type": "nt-asgd", + // "lr": 22.5, + // "weight_decay": 1.2e-6 + // }, + // "learning_rate_scheduler": { + // "type": "nt-asgd", + // "non_monotone_interval": 5 + // }, + "optimizer": { + "type": "adam", + "lr": 3e-4, + "weight_decay": 1.2e-6 + }, + "validation_metric": "-ppl" + } +} From 397a5121b403d2886e9df951a3eb55a2037d8029 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:44:21 -0800 Subject: [PATCH 25/35] Sentence lengths + importance sampling granularity * No longer returning sentence lengths from the fancy iterator * Affected trainer/commands updated accordingly * Importance sampling now samples batch in outer loop, to do instance-level evaluation. --- kglm/commands/beamsum.py | 16 +- kglm/commands/evaluate_perplexity.py | 247 ++++++++++++++------------ kglm/data/iterators/fancy_iterator.py | 2 +- kglm/training/trainer.py | 12 +- 4 files changed, 144 insertions(+), 133 deletions(-) diff --git a/kglm/commands/beamsum.py b/kglm/commands/beamsum.py index d4c5c61..7d5b38f 100644 --- a/kglm/commands/beamsum.py +++ b/kglm/commands/beamsum.py @@ -97,7 +97,7 @@ def evaluate_perplexity(model: Model, held_over_data = None - for batch, _ in generator_tqdm: + for batch in generator_tqdm: # We need sequence length to help compute perplexity batch_size, _ = batch['source']['tokens'].shape @@ -112,25 +112,30 @@ def evaluate_perplexity(model: Model, # Draw a sample with torch.no_grad(): - sample = sampler.beam_search(batch['source'], - batch['reset'], - beam_width) + sample = sampler.beam_search(source=batch['source'], + reset=batch['reset'], + k=beam_width) # Evaluate on sample with torch.no_grad(): model_output = model(**sample) + # gold_output = model(**batch) model_logp = model_output['logp'] + # logger.debug(model_logp) + # logger.debug(gold_output['logp']) model_logp = model_logp.view(batch_size, beam_width) model_logp = torch.logsumexp(model_logp, -1) - print(torch.exp(-model_logp.sum() / n_tokens.sum())) + # logger.debug(torch.exp(-model_logp.sum() / n_tokens.sum())) if summand is None: summand = model_logp.sum() else: summand += model_logp.sum() + logger.debug(torch.exp(-summand / denom)) + ppl = torch.exp(-summand / denom) metrics = { @@ -156,7 +161,6 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: sampler = sampler_archive.model sampler.eval() - # Load the evaluation data. NOTE: We are using the model's reader! validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: diff --git a/kglm/commands/evaluate_perplexity.py b/kglm/commands/evaluate_perplexity.py index d609a08..e71caa6 100644 --- a/kglm/commands/evaluate_perplexity.py +++ b/kglm/commands/evaluate_perplexity.py @@ -34,7 +34,7 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar subparser.add_argument('input_file', type=str, help='path to the file containing the evaluation data') - subparser.add_argument('--output-file', type=str, help='path to output file') + subparser.add_argument('--out', type=str, help='prefix of output files') subparser.add_argument('--weights-file', type=str, @@ -53,12 +53,12 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar subparser.add_argument('--batch-size', type=int, - default=None, + default=1, help='Batch size (default: whatever iterator was set to)') subparser.add_argument('--split-size', type=int, - default=None, + default=1e10, help='Split size (default: whatever iterator was set to)') subparser.add_argument('--num-samples', @@ -83,24 +83,44 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar return subparser -PRESERVED_FIELDS = {'source', 'reset'} - - -def _offset(sample, held_over_data): - batch_size = sample['reset'].size(0) - new_sample = {'source': sample['source'], - 'reset': sample['reset']} - new_held_over_data = {} - for field in sample: - if field in PRESERVED_FIELDS: - continue - if held_over_data is None: - prefix = sample[field].new_zeros(batch_size) - else: - prefix = held_over_data[field] - new_sample[field] = torch.cat((prefix.unsqueeze(1), sample[field][:,:-1]), dim=1) - new_held_over_data[field] = sample[field][:,-1] - return new_sample, new_held_over_data +# TODO: Make sure this still makes sense... +# PRESERVED_FIELDS = {'source', 'reset'} +# +# +# def _offset(sample, held_over_data): +# batch_size = sample['reset'].size(0) +# new_sample = {'source': sample['source'], +# 'reset': sample['reset']} +# new_held_over_data = {} +# for field in sample: +# if field in PRESERVED_FIELDS: +# continue +# if held_over_data is None: +# prefix = sample[field].new_zeros(batch_size) +# else: +# prefix = held_over_data[field] +# new_sample[field] = torch.cat((prefix.unsqueeze(1), sample[field][:,:-1]), dim=1) +# new_held_over_data[field] = sample[field][:,-1] +# return new_sample, new_held_over_data + +UNSPLIT_FIELDS = {'reset', 'metadata', 'shortlist'} +def split(batch, split_size: int): + sequence_length = batch['source']['tokens'].shape[1] + num_splits = sequence_length // split_size + + def _chunk(x, start, stop): + if isinstance(x, dict): + return {k: v if k in UNSPLIT_FIELDS else _chunk(v, start, stop) for k, v in x.items()} + if isinstance(x, torch.Tensor): + return x[:, start:stop].contiguous() + + for i in range(num_splits): + chunk = _chunk(batch, i * split_size, (i + 1) * split_size) + + if i > 0: + chunk['reset'] = torch.zeros_like(chunk['reset']) + + yield chunk def tile(t, amount): @@ -110,6 +130,8 @@ def tile(t, amount): return t.repeat(*args) elif isinstance(t, dict): return {k: tile(v, amount) for k, v in t.items()} + elif isinstance(t, list): + return [x for x in t for _ in range(amount)] def logsumexp(prev: torch.FloatTensor, @@ -133,114 +155,107 @@ def evaluate_perplexity(model: Model, cuda_device: int, temperature: float = 1.0, offset: bool = False, - samples_per_batch: int = 1) -> Dict[str, Any]: - check_for_gpu(cuda_device) + samples_per_batch: int = 1, + split_size: int = 1e10) -> Dict[str, Any]: + check_for_gpu(cuda_device) logger.info('Iterating over dataset') + # weight = None - # summands = [] - # penalized_summands = [] - trajectory = np.zeros(num_samples // samples_per_batch) - individual_estimates = np.zeros(num_samples // samples_per_batch) + model.eval() + sampler.eval() - weight = None + iterator = data_iterator(instances, num_epochs=1, shuffle=False) + generator_tqdm = Tqdm.tqdm(iterator, total=0) - for i in range(num_samples // samples_per_batch): - iterator = data_iterator(instances, num_epochs=1, shuffle=False) - generator_tqdm = Tqdm.tqdm(iterator, total=0) + summand = 0.0 + denom = 0.0 + fp = [] + q = [] + all_weights = [] - model.eval() - sampler.eval() - sampler._state = None + for batch in generator_tqdm: - summand = None - denom = None - #summand = torch.tensor(0.0) - # penalized_summand = torch.tensor(0.0) + batch_size = batch['reset'].shape[0] - held_over_data = None + n_tokens = util.get_text_field_mask(batch['source']).float().sum() + denom += n_tokens - for batch, _ in generator_tqdm: + epoch_weights = [] + epoch_fp = [] + epoch_q = [] - # We need sequence length to help compute perplexity - n_tokens = util.get_text_field_mask(batch['source']).float().sum(dim=-1) - if denom is None: - denom = n_tokens - else: - denom += n_tokens + batch = util.move_to_device(batch, cuda_device) - summand = util.move_to_device(summand, cuda_device) - batch = util.move_to_device(batch, cuda_device) + # Tile if that's what we're doing + if samples_per_batch > 1: + batch = tile(batch, samples_per_batch) - # Tile if that's what we're doing - if samples_per_batch > 1: - batch = tile(batch, samples_per_batch) + for i in range(num_samples // samples_per_batch): - # Draw a sample - with torch.no_grad(): - sampler_output = sampler.sample(**batch, - temperature=temperature, - offset=offset) - sample_logp = sampler_output['logp'] - sample = sampler_output['sample'] + logger.info(f'i={i}') - if offset: - sample, held_over_data = _offset(sample, held_over_data) + # summand = util.move_to_device(summand, cuda_device) + # batch = util.move_to_device(batch, cuda_device) - # Evaluate on sample - with torch.no_grad(): - model_output = model(**sample) + weights = None + for chunk in split(batch, split_size): + logger.info('next_chunk') - model_logp = model_output['logp'] - if summand is None: - summand = (model_logp - sample_logp) - else: - summand += (model_logp - sample_logp) + # Draw a sample + with torch.no_grad(): + sampler_output = sampler.sample(**chunk, + temperature=temperature, + offset=offset) + sample_logp = sampler_output['logp'] + sample = sampler_output['sample'] - # model_penalized_logp = model_output['penalized_logp'] - # penalized_summand += (model_penalized_logp - sample_logp) + # if offset: + # sample, held_over_data = _offset(sample, held_over_data) - # generator_tqdm.set_description('Instantaneous PPL: %0.4f' % torch.exp((sample_logp - model_logp) / n_tokens).item()) + with torch.no_grad(): + model_output = model(**sample) + model_logp = model_output['logp'] + split_weights = (model_logp - sample_logp).view(batch_size, samples_per_batch) - current_avg = summand.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() - instance_ppl = torch.exp(-current_avg.sum() / denom.sum()) - print(denom.sum()) + if weights is None: + weights = split_weights + else: + weights += split_weights + logger.debug(torch.exp(-split_weights/split_size)) + epoch_weights.append(weights.cpu()) + epoch_fp.append(model_logp.view(batch_size, samples_per_batch).cpu()) + epoch_q.append(sample_logp.view(batch_size, samples_per_batch).cpu()) - weight = logsumexp(weight, summand, i, samples_per_batch) - ppl = torch.exp(-weight / denom.sum()) + # Combine all the epoch weights + combined_weights = torch.cat(epoch_weights, dim=1) + combined_fp = torch.cat(epoch_fp, dim=1) + combined_q = torch.cat(epoch_q, dim=1) + all_weights.append(combined_weights) + fp.append(combined_fp) + q.append(combined_q) - individual_estimates[i] = instance_ppl.item() - trajectory[i] = ppl.item() + # Compute importance sampled logp of the sequences in the batch + logp_hat = combined_weights.logsumexp(dim=1) - math.log(samples_per_batch) + summand += logp_hat.sum() - # summands.append(summand) - # # penalized_summands.append(penalized_summand) - # # if i == 0: - # # t = summand.unsqueeze(0) - # # p = penalized_summand.unsqueeze(0) - # # else: - # # t = torch.stack(summands, dim=0) - # # # p = torch.stack(penalized_summands, dim=0) - # t = torch.cat(summands, dim=0) - # t_sum = torch.logsumexp(t, dim=0) - # # p_sum = torch.logsumexp(p, dim=0) - # sum_logp = (t_sum - math.log((i+1)*1000)).item() - # # sum_logp_penalized = (p_sum - math.log((i+1)*1000)).item() - # ppl = math.exp(-sum_logp / 659) - # # upp = math.exp(-sum_logp_penalized / denom) + logger.info(f'PPL: {torch.exp(-summand / denom)}') - # trajectory[i] = ppl - # # individual_estimates[i] = math.exp(-summand.item() / denom) + # Create array of all the weights + all_weights_array = torch.cat(all_weights, dim=0).numpy() + fp_array = torch.cat(fp, dim=0).numpy() + q_array = torch.cat(q, dim=0).numpy() - # print('PPL: %f' % ppl) - # # print('UPP: %f' % upp) + # Compute perplexity + ppl = torch.exp(-summand / denom) metrics = { 'ppl': ppl, - # 'upp': upp, - 'trajectory': trajectory, - 'individual_estimates': individual_estimates + 'weights': all_weights_array, + 'fp': fp_array, + 'q': q_array } return metrics @@ -249,6 +264,7 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) + logger.warning('This code will return improper results if sequences are split') # Load model from archive model_archive = load_archive(args.model_archive_file, args.cuda_device, args.overrides, args.weights_file) @@ -272,28 +288,27 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: logger.info('Reading evaluation data from: %s', evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) - # To avoid hairy issues with splitting, we opt to use a basic iterator so that we can - # generate samples for entire sequences. iterator_params = config.pop('iterator', 'None') - if args.batch_size is not None: - iterator_params['batch_size'] = args.batch_size - if args.split_size is not None: - iterator_params['split_size'] = args.split_size - iterator_params['truncate'] = False + iterator_params['batch_size'] = args.batch_size + # Make split size really large to prevent splits (otherwise we'd have to + # deal with averaging the importance samples across splits ... + # if args.split_size is not None: + # iterator_params['split_size'] = args.split_size + iterator_params['split_size'] = 1e10 + iterator_params['truncate'] = False # TODO: Shouldn't need this anymore... iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate_perplexity(model, sampler, args.num_samples, instances, iterator, args.cuda_device, args.temperature, - args.offset, args.samples_per_batch) + args.offset, args.samples_per_batch, + args.split_size) logger.info('Finished evaluating.') - logger.info('Metrics:') - for key, metric in metrics.items(): - logger.info('%s: %s', key, metric) - - output_file = args.output_file - if output_file: - np.save(output_file + '.trajectory.npy', metrics['trajectory']) - np.save(output_file + '.individual_estimates.npy', metrics['individual_estimates']) + + if args.out: + np.save(args.out + '_weights.npy', metrics['weights']) + np.save(args.out + '_fp.npy', metrics['fp']) + np.save(args.out + '_q.npy', metrics['q']) + return metrics diff --git a/kglm/data/iterators/fancy_iterator.py b/kglm/data/iterators/fancy_iterator.py index 2c6a5df..1186eae 100644 --- a/kglm/data/iterators/fancy_iterator.py +++ b/kglm/data/iterators/fancy_iterator.py @@ -105,7 +105,7 @@ def __call__(self, batch.index_instances(self.vocab) padding_lengths = batch.get_padding_lengths() - yield batch.as_tensor_dict(padding_lengths), 1 + yield batch.as_tensor_dict(padding_lengths) self._epochs[key] = epoch + 1 diff --git a/kglm/training/trainer.py b/kglm/training/trainer.py index 8c00caa..5bd6c33 100644 --- a/kglm/training/trainer.py +++ b/kglm/training/trainer.py @@ -288,7 +288,7 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: train_generator_tqdm = Tqdm.tqdm(raw_train_generator, total=num_training_batches) cumulative_batch_size = 0 - for batch, lr_mult in train_generator_tqdm: + for batch in train_generator_tqdm: batches_this_epoch += 1 self._batch_num_total += 1 batch_num_total = self._batch_num_total @@ -314,12 +314,6 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: if self._learning_rate_scheduler: self._learning_rate_scheduler.step_batch(batch_num_total) - # We dynamically adjust the learning rate to account for slight variations in the input - # sequences - original_lr = self.optimizer.param_groups[0]['lr'] - batch_lr = original_lr * lr_mult - self.optimizer.param_groups[0]['lr'] = batch_lr - if self._tensorboard.should_log_histograms_this_batch(): # get the magnitude of parameter updates for logging # We need a copy of current parameters to compute magnitude of updates, @@ -336,8 +330,6 @@ def _train_epoch(self, epoch: int) -> Dict[str, float]: else: self.optimizer.step() - self.optimizer.param_groups[0]['lr'] = original_lr - # Update moving averages if self._moving_average is not None: self._moving_average.apply(batch_num_total) @@ -401,7 +393,7 @@ def _validation_loss(self) -> Tuple[float, int]: total=num_validation_batches) batches_this_epoch = 0 val_loss = 0 - for batch, _ in val_generator_tqdm: + for batch in val_generator_tqdm: loss = self.batch_loss(batch, for_training=False) if loss is not None: From 6505edb2c6d717c9a68d1290d7a34ee9eb4e548d Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:48:27 -0800 Subject: [PATCH 26/35] BUGFIX: Unnecc. @@START@@ and @@END@@ tokens. --- kglm/data/dataset_readers/enhanced_wikitext.py | 1 - 1 file changed, 1 deletion(-) diff --git a/kglm/data/dataset_readers/enhanced_wikitext.py b/kglm/data/dataset_readers/enhanced_wikitext.py index dd5d1ec..d646e47 100644 --- a/kglm/data/dataset_readers/enhanced_wikitext.py +++ b/kglm/data/dataset_readers/enhanced_wikitext.py @@ -362,7 +362,6 @@ def _read(self, file_path: str) -> Iterable[Instance]: def text_to_instance(self, data: Dict[str, Any]) -> Instance: # pylint: disable=arguments-differ # Flatten and pad tokens tokens = _flatten(data['tokens']) - tokens = ['@@START@@', *tokens, '@@END@@'] source = [Token(x) for x in tokens[:-1]] target = [Token(x) for x in tokens[1:]] fields = { From b9ac8b2c6f600a2287e23127bf29af7abde99ecd Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:48:59 -0800 Subject: [PATCH 27/35] Updated .gitignore and Docker stuff (for efficiency) --- .dockerignore | 5 +++-- .gitignore | 7 ++++++- Dockerfile | 46 ++++++++++++++++++++-------------------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/.dockerignore b/.dockerignore index bf6d83d..2de1d56 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,10 +5,11 @@ Dockerfile old-results/ .git/ .gitignore +.ipynb_checkpoints/ .mypy_cache .pytest_cache/ -results -scripts +results/ +scripts/ .travis.yml .venv .vscode diff --git a/.gitignore b/.gitignore index a691629..2e04592 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.pyc __pycache__ .venv/ +.ipynb_checkpoints # Testing @@ -14,5 +15,9 @@ __pycache__ .vscode/ -# AlleNLP +# Experiment-related results/ +data/ +*.yaml +*.npy +*.jinja2 diff --git a/Dockerfile b/Dockerfile index 6205775..16239fd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,27 @@ -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 -RUN echo "deb-src http://archive.ubuntu.com/ubuntu/ xenial main" | tee -a /etc/apt/sources.list -RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - cmake \ - git \ - curl \ - vim \ - ca-certificates \ - libjpeg-dev \ - libpng-dev &&\ - rm -rf /var/lib/apy/lists/* +FROM python:3.6.8-jessie -RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p /opt/conda && \ - rm ~/miniconda.sh && \ - /opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include cython typing && \ - /opt/conda/bin/conda install -y -c pytorch magma-cuda100 && \ - /opt/conda/bin/conda clean -ya -ENV PATH /opt/conda/bin:$PATH +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +ENV PATH /usr/local/nvidia/bin/:$PATH +ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 + +# Tell nvidia-docker the driver spec that we need as well as to +# use all available devices, which are mounted at /usr/local/nvidia. +# The LABEL supports an older version of nvidia-docker, the env +# variables a newer one. +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +LABEL com.nvidia.volumes.needed="nvidia_driver" WORKDIR /workspace +RUN chmod -R a+w /workspace + +COPY requirements.txt . +RUN pip install -r requirements.txt -COPY experiments/ experiments/ -COPY kglm/ kglm/ COPY .pylintrc .pylintrc COPY pytest.ini pytest.ini COPY README.md README.md -COPY requirements.txt . - -RUN pip install -r requirements.txt -RUN chmod -R a+w /workspace +COPY kglm/ kglm/ +COPY experiments/ experiments/ From 7175609917c6203c112f24d73a924448fbc8b819 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:49:48 -0800 Subject: [PATCH 28/35] BUGFIX: Timestep miscalculation --- kglm/models/entity_disc.py | 40 +++++++++++++++++++++--------- kglm/models/entity_nlm.py | 19 +++++++++++--- kglm/modules/dynamic_embeddings.py | 31 +++++++++++++---------- 3 files changed, 61 insertions(+), 29 deletions(-) diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index cc5c418..12dd790 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -91,7 +91,8 @@ def __init__(self, out_features=2, bias=False) self._dynamic_embeddings = DynamicEmbedding(embedding_dim=embedding_dim, - max_embeddings=max_embeddings) + max_embeddings=max_embeddings, + tied_weight=self._entity_type_projection.weight) # For mention length prediction self._mention_length_projection = torch.nn.Linear(in_features=2*embedding_dim, @@ -197,9 +198,11 @@ def sample(self, # pylint: disable=unused-argument self.reset_states(reset) if self._state is None: - prev_mention_lengths = source['tokens'].new_ones(batch_size) + prev_mention_lengths = source['tokens'].new_zeros(batch_size) + prev_t = source['tokens'].new_zeros(batch_size) else: prev_mention_lengths = self._state['prev_mention_lengths'] + prev_t = self._state['prev_t'] # Embed tokens and get RNN hidden state. mask = get_text_field_mask(source) @@ -248,7 +251,7 @@ def sample(self, # pylint: disable=unused-argument if predict_em.any(): # Predict entity ids entity_id_prediction_outputs = self._dynamic_embeddings(hidden=current_hidden, - timestep=timestep, + timestep=prev_t, mask=predict_em) entity_id_logits = entity_id_prediction_outputs['logits'] / temperature entity_id_mask = entity_id_prediction_outputs['logit_mask'] @@ -284,7 +287,7 @@ def sample(self, # pylint: disable=unused-argument self._dynamic_embeddings.add_embeddings(timestep, new_entities) self._dynamic_embeddings.update_embeddings(hidden=current_hidden, update_indices=entity_ids[:, timestep], - timestep=timestep, + timestep=prev_t, mask=predict_em) # If the previous mentions are ongoing, we assign the output deterministically. Mention @@ -300,9 +303,11 @@ def sample(self, # pylint: disable=unused-argument # Update mention lengths for next timestep prev_mention_lengths = mention_lengths[:, timestep] + prev_t += 1 # Update state - self._state = {'prev_mention_lengths': prev_mention_lengths.detach()} + self._state = {'prev_mention_lengths': prev_mention_lengths.detach(), + 'prev_t': prev_t.detach()} return { 'logp': logp, @@ -337,7 +342,7 @@ def mention_length_lookup(self): def _annotation_logp(self, hidden: torch.FloatTensor, - timestep: int, + timestep: torch.LongTensor, beam_states: List[Dict[str, Any]]) -> torch.Tensor: """Computes the log-probability of all possible annotations for a single beam state. @@ -361,6 +366,8 @@ def _annotation_logp(self, entity_type_logp = F.log_softmax(entity_type_logits, -1) # Entity id log probabilities: (batch_size, max_embeddings) + # Should be okay to use timestep instead of prev_t since there's no + # splitting in beam search. entity_id_logits = self._dynamic_embeddings(hidden, timestep)['logits'] entity_id_logp = F.log_softmax(entity_id_logits, -1) @@ -452,7 +459,7 @@ def _top_k_annotations(self, def _update_beam_states(self, hidden: torch.FloatTensor, - timestep: int, + timestep: torch.LongTensor, beam_states: List[Dict[str, Any]], output: Dict[str, torch.Tensor]) -> List[Dict[str, Any]]: """ @@ -493,6 +500,8 @@ def _update_beam_states(self, output['entity_ids'][:, i] = entity_ids # Now do this right... + # Should be okay to use timestep instead of prev_t since there's no + # splitting in beam search. self._dynamic_embeddings.add_embeddings(timestep, new_entities) self._dynamic_embeddings.update_embeddings(hidden=hidden, update_indices=entity_ids, @@ -585,6 +594,7 @@ def beam_search(self, 'this setting!') self.reset_states(reset) prev_mention_lengths = source['tokens'].new_zeros(batch_size) + prev_t = source['tokens'].new_zeros(batch_size) # Embed and encode the tokens up front. embeddings = self._text_field_embedder(source) @@ -597,7 +607,7 @@ def beam_search(self, for timestep in range(sequence_length): # Get log probabilities of annotations # (batch_size, k, num_annotations) - logp = self._annotation_logp(hidden[:, timestep], timestep, beam_states) + logp = self._annotation_logp(hidden[:, timestep], prev_t, beam_states) # Accout for ongoing mentions logp = self._adjust_for_ongoing_mentions(logp, output) # Add to cumulative log probabilities of beams (which have shape (batch_size, k)) @@ -605,8 +615,9 @@ def beam_search(self, logp += output['logp'].unsqueeze(-1) output = self._top_k_annotations(logp, k) - beam_states = self._update_beam_states(hidden[:, timestep], timestep, beam_states, output) + beam_states = self._update_beam_states(hidden[:, timestep], prev_t, beam_states, output) predictions.append(output) + prev_t = prev_t + 1 # Trace backpointers to get annotation. annotation = self._trace_backpointers(source, reset, k, predictions) @@ -653,8 +664,10 @@ def _forward_loop(self, # Need to track previous mention lengths in order to know when to measure loss. if self._state is None: prev_mention_lengths = mention_lengths.new_zeros(batch_size) + prev_t = mention_lengths.new_zeros(batch_size) else: prev_mention_lengths = self._state['prev_mention_lengths'] + prev_t = self._state['prev_t'] # Embed tokens and get RNN hidden state. mask = get_text_field_mask(tokens) @@ -701,7 +714,7 @@ def _forward_loop(self, modified_entity_ids = current_entity_ids.clone() modified_entity_ids[modified_entity_ids == self._dynamic_embeddings.num_embeddings] = 0 entity_id_prediction_outputs = self._dynamic_embeddings(hidden=current_hidden, - timestep=timestep, + timestep=prev_t, target=modified_entity_ids, mask=predict_em) _entity_id_loss = -entity_id_prediction_outputs['loss'] @@ -730,10 +743,11 @@ def _forward_loop(self, # We also perform updates of the currently observed entities. self._dynamic_embeddings.update_embeddings(hidden=current_hidden, update_indices=current_entity_ids, - timestep=timestep, + timestep=prev_t, mask=current_entity_types) prev_mention_lengths = current_mention_lengths + prev_t += 1 # Normalize the losses entity_type_loss = entity_type_loss / mask.sum() @@ -752,7 +766,8 @@ def _forward_loop(self, # Update the model state self._state = { - 'prev_mention_lengths': mention_lengths[:, -1].detach() + 'prev_mention_lengths': mention_lengths[:, -1].detach(), + 'prev_t': prev_t.detach() } return output_dict @@ -762,6 +777,7 @@ def reset_states(self, reset: torch.ByteTensor) -> None: if reset.any() and (self._state is not None): # Zero out any previous elements self._state['prev_mention_lengths'][reset] = 0 + self._state['prev_t'][reset] = 0 # Reset the dynamic embeddings and lstm self._dynamic_embeddings.reset_states(reset) diff --git a/kglm/models/entity_nlm.py b/kglm/models/entity_nlm.py index de27d5b..2b699c1 100644 --- a/kglm/models/entity_nlm.py +++ b/kglm/models/entity_nlm.py @@ -221,14 +221,16 @@ def _forward_loop(self, mention_lengths = torch.cat((self._state['prev_mention_lengths'], mention_lengths), dim=1) contexts = self._state['prev_contexts'] sequence_length += 1 + prev_t = self._state['prev_t'] else: contexts = tokens['tokens'].new_zeros(batch_size, self._embedding_dim, dtype=torch.float32) + prev_t = tokens['tokens'].new_zeros(batch_size) # Embed tokens and get RNN hidden state. mask = get_text_field_mask(tokens).byte() embeddings = self._text_field_embedder(tokens) embeddings = self._variational_dropout(embeddings) - hidden = self._rnn(embeddings) + hidden = self._rnn(embeddings[:,:-1]) # Otherwise will double count on splits # Initialize losses entity_type_loss = 0.0 @@ -264,7 +266,7 @@ def _forward_loop(self, # We also perform updates of the currently observed entities. self._dynamic_embeddings.update_embeddings(hidden=current_hidden, update_indices=current_entity_ids, - timestep=timestep, + timestep=prev_t, mask=current_entity_types) # This part is a little counter-intuitive. Because the above code adds a new embedding @@ -302,7 +304,7 @@ def _forward_loop(self, if predict_em.any(): # Equation 4 in the paper. entity_id_prediction_outputs = self._dynamic_embeddings(hidden=current_hidden, - timestep=timestep, + timestep=prev_t, target=next_entity_ids, mask=predict_em) _entity_id_loss = -entity_id_prediction_outputs['loss'] @@ -374,6 +376,12 @@ def _forward_loop(self, # Lastly update contexts contexts = combined_embeddings + prev_t += 1 + + # And to be super careful, we want to reset any rnn hidden states + # if the current token is padding (this could impact performance at + # the start of a sequence). + self._rnn.reset(~current_mask) self._perplexity(vocab_loss, mask.sum()) @@ -390,6 +398,7 @@ def _forward_loop(self, logger.debug('Vocab loss: %0.4f', vocab_loss) total_loss = entity_type_loss + entity_id_loss + mention_length_loss + vocab_loss + output_dict = { 'entity_type_loss': entity_type_loss, 'entity_id_loss': entity_id_loss, @@ -405,7 +414,8 @@ def _forward_loop(self, 'prev_entity_types': entity_types[:, -1].unsqueeze(1).detach(), 'prev_entity_ids': entity_ids[:, -1].unsqueeze(1).detach(), 'prev_mention_lengths': mention_lengths[:, -1].unsqueeze(1).detach(), - 'prev_contexts': contexts.detach() + 'prev_contexts': contexts.detach(), + 'prev_t': prev_t.detach() } return output_dict @@ -421,6 +431,7 @@ def reset_states(self, reset: torch.ByteTensor) -> None: self._state['prev_entity_types'][reset] = 0 self._state['prev_entity_ids'][reset] = 0 self._state['prev_mention_lengths'][reset] = 0 + self._state['prev_t'][reset] = 0 self._state['prev_contexts'][reset] = 0.0 # Reset the dynamic embeddings diff --git a/kglm/modules/dynamic_embeddings.py b/kglm/modules/dynamic_embeddings.py index 6be3c33..d2ac131 100644 --- a/kglm/modules/dynamic_embeddings.py +++ b/kglm/modules/dynamic_embeddings.py @@ -31,14 +31,16 @@ class DynamicEmbedding(Module): """ def __init__(self, embedding_dim: int, - max_embeddings: int) -> None: + max_embeddings: int, + tied_weight: Optional[torch.nn.Parameter]) -> None: super(DynamicEmbedding, self).__init__() self._embedding_dim = embedding_dim self._max_embeddings = max_embeddings - self._initial_embedding = Parameter(F.normalize(torch.randn(embedding_dim), dim=0)) + self._initial_embedding = tied_weight + # self._initial_embedding = Parameter(F.normalize(torch.randn(embedding_dim), dim=0)) - self._distance_scalar = Parameter(torch.tensor(1e-6)) # pylint: disable=E1102 + self._distance_scalar = Parameter(torch.tensor(1e-3)) # pylint: disable=E1102 self._embedding_projection = torch.nn.Linear(in_features=embedding_dim, out_features=embedding_dim, bias=False) @@ -69,10 +71,10 @@ def reset_states(self, reset: torch.ByteTensor) -> None: # This simplifies the case where the batch_size has been if reset.all(): self.embeddings = self._initial_embedding.new_zeros(batch_size, self._max_embeddings, - self._embedding_dim) + self._embedding_dim) self.num_embeddings = self._initial_embedding.new_zeros(batch_size, dtype=torch.int64) self.last_seen = self._initial_embedding.new_zeros(batch_size, self._max_embeddings, - dtype=torch.int64) + dtype=torch.int64) else: self.embeddings[reset] = 0 self.num_embeddings[reset] = 0 @@ -88,7 +90,7 @@ def detach_states(self) -> None: self.embeddings = self.embeddings.detach() def add_embeddings(self, - timestep: int, + timestep: torch.LongTensor, mask: Optional[torch.Tensor] = None) -> None: """ Adds new embeddings to the current collection of embeddings. @@ -110,7 +112,7 @@ def add_embeddings(self, # Embeddings are initialized by adding a small amount of random noise to the initial # embedding tensor then normalizing. - initial = self._initial_embedding.repeat((mask.sum(), 1, 1)) + initial = self._initial_embedding[1].repeat((mask.sum(), 1, 1)) noise = 1e-4 * torch.randn_like(initial) # 1e-4 is a magic number from the original implementation unnormalized = initial + noise normalized = F.normalize(unnormalized, dim=-1) @@ -122,11 +124,10 @@ def add_embeddings(self, if self.num_embeddings.max() == (self._max_embeddings - 1): logger.warning('Embeddings full') - def update_embeddings(self, hidden: torch.Tensor, update_indices: torch.Tensor, - timestep: int, + timestep: torch.LongTensor, mask: Optional[torch.Tensor] = None) -> None: """ Updates existing embeddings. @@ -170,7 +171,7 @@ def update_embeddings(self, # dimension when accessing self.embeddings. Accordingly, the batch dimension of # normalized needs to be dropped in this case in order for assignment to work. self.embeddings[mask, update_indices[mask]] = normalized.squeeze(0) - self.last_seen[mask, update_indices[mask]] = timestep + self.last_seen[mask, update_indices[mask]] = timestep[mask] @overrides def forward(self, # pylint: disable=arguments-differ @@ -219,7 +220,9 @@ def forward(self, # pylint: disable=arguments-differ bilinear = bilinear.view(batch_size, -1) # Second half of equation 4. - distance_score = torch.exp(self._distance_scalar * (self.last_seen[mask].float() - timestep)) + distance_score = self._distance_scalar * (timestep[mask].float().unsqueeze(-1) - self.last_seen[mask].float()) + assert not (self.last_seen[mask] - timestep[mask].unsqueeze(-1)).gt(0).any() + logits = bilinear + distance_score # Since we pre-allocate the embedding array, logits includes scores for all of the @@ -228,11 +231,12 @@ def forward(self, # pylint: disable=arguments-differ num_embeddings = self.num_embeddings[mask].unsqueeze(1) arange = torch.arange(self._max_embeddings, device=num_embeddings.device).repeat(mask.sum(), 1) logit_mask = arange.lt(num_embeddings) - logits[logit_mask != 1] = 1e-34 + logits[logit_mask != 1] = -1e34 out = { 'logits': logits, - 'logit_mask': logit_mask + 'logit_mask': logit_mask, + 'logp': F.log_softmax(logits, -1) } if target is not None: @@ -255,3 +259,4 @@ def load_beam_state(self, beam_state): self.embeddings = beam_state.get('embeddings', None) self.num_embeddings = beam_state.get('num_embeddings', None) self.last_seen = beam_state.get('last_seen', None) + From ea77e027ce7fc5e96e1ea12a7601f3790803ddb7 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:50:50 -0800 Subject: [PATCH 29/35] Added logging --- kglm/modules/weight_drop.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kglm/modules/weight_drop.py b/kglm/modules/weight_drop.py index 8d4a924..0bfef8a 100644 --- a/kglm/modules/weight_drop.py +++ b/kglm/modules/weight_drop.py @@ -1,3 +1,4 @@ +import logging from typing import Dict, List, Optional, Tuple from overrides import overrides @@ -5,6 +6,8 @@ from torch.nn import Parameter import torch.nn.functional as F +logger = logging.getLogger(__name__) + LstmState = Tuple[torch.FloatTensor, torch.FloatTensor] StateDict = Dict[str, LstmState] @@ -101,6 +104,10 @@ def reset(self, reset: torch.ByteTensor = None) -> None: """Resets the internal hidden states""" # pylint: disable=invalid-name if reset is None: + logger.debug('Fully resetting LSTM state') + self._state = None + elif reset.all(): + logger.debug('Fully resetting LSTM state') self._state = None if self._state is None: return @@ -109,3 +116,4 @@ def reset(self, reset: torch.ByteTensor = None) -> None: h[:, reset, :] = torch.zeros_like(h[:, reset, :]) c[:, reset, :] = torch.zeros_like(c[:, reset, :]) self._state['layer_%i' % layer] = (h, c) + From 483f547d39439de117e567f5205570015e6d349d Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:51:49 -0800 Subject: [PATCH 30/35] Kglm and KglmDisc now output vector logp's --- kglm/models/kglm.py | 44 +++++++++++++----------- kglm/models/kglm_disc.py | 73 ++++++++++++++++++++++------------------ 2 files changed, 65 insertions(+), 52 deletions(-) diff --git a/kglm/models/kglm.py b/kglm/models/kglm.py index 074bdcc..27ade21 100644 --- a/kglm/models/kglm.py +++ b/kglm/models/kglm.py @@ -446,8 +446,8 @@ def _forward_loop(self, # Predict whether or not the next token will be an entity mention, and if so which type. mention_type_loss = self._mention_type_loss(encoded_token, mention_type, target_mask) - self._avg_mention_type_loss(float(mention_type_loss)) - logger.debug('mention type loss: %0.4f', mention_type_loss) + logger.debug('mention loss: %0.4f', mention_type_loss.sum() / (target_mask.sum().float() + 1e-13)) + self._avg_mention_type_loss(float(mention_type_loss.sum() / (target_mask.sum().float() + 1e-13))) # For new mentions, predict which entity (among those in the supplied shortlist) will be # mentioned. @@ -464,8 +464,8 @@ def _forward_loop(self, None, target_mask) - self._avg_new_entity_loss(float(new_entity_loss)) - logger.debug('new entity loss: %0.4f', new_entity_loss) + logger.debug('new ent loss: %0.4f', new_entity_loss.sum() / (target_mask.sum().float() + 1e-13)) + self._avg_new_entity_loss(float(new_entity_loss.sum() / (target_mask.sum().float() + 1e-13))) # For derived mentions, first predict which parent(s) to expand... knowledge_graph_entity_loss = self._knowledge_graph_entity_loss(encoded_head, @@ -474,8 +474,8 @@ def _forward_loop(self, entity_ids, parent_ids, target_mask) - self._avg_knowledge_graph_entity_loss(float(knowledge_graph_entity_loss)) - logger.debug('kg entity loss: %0.4f', knowledge_graph_entity_loss) + self._avg_knowledge_graph_entity_loss(float(knowledge_graph_entity_loss.sum() / (target_mask.sum().float() + 1e-13))) + logger.debug('kg loss: %0.4f', knowledge_graph_entity_loss.sum() / (target_mask.sum().float() + 1e-13)) # Predict generation-mode scores. Note: these are W.R.T to entity_ids since we need the embedding. generate_scores = self._generate_scores(encoded_token, entity_ids) @@ -492,11 +492,13 @@ def _forward_loop(self, target_mask, alias_inds, entity_ids.gt(0)) + logger.debug('vocab loss: %0.4f', vocab_loss.sum() / (target_mask.sum().float() + 1e-13)) # Compute total loss. Also compute logp (needed for importance sampling evaluation). - loss = vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss - logp = -(vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss) * target_mask.sum() - penalized_logp = -(penalized_vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss) * target_mask.sum() + loss = (vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss).sum() / (target_mask.sum().float() + 1e-13) + logger.debug('loss: %0.4f', loss) + logp = -(vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss) + penalized_logp = -(penalized_vocab_loss + mention_type_loss + new_entity_loss + knowledge_graph_entity_loss) # Activation regularization if self._alpha: @@ -681,8 +683,11 @@ def _mention_type_loss(self, entity mention. """ logits = self._fc_mention_type(encoded) - mention_loss = sequence_cross_entropy_with_logits(logits, mention_type, mask, - average='token') + mention_logp = F.log_softmax(logits, -1) + mention_loss = -mention_logp.gather(-1, mention_type.unsqueeze(-1)).squeeze() + mention_loss = mention_loss * mask.float() + # mention_loss = sequence_cross_entropy_with_logits(logits, mention_type, mask, + # average='token') # if not self.training: self._new_mention_f1(predictions=logits, @@ -692,7 +697,7 @@ def _mention_type_loss(self, gold_labels=mention_type, mask=mask) - return mention_loss + return mention_loss.sum(-1) def _new_entity_logits(self, encoded: torch.Tensor, @@ -730,18 +735,17 @@ def _new_entity_loss(self, log_probs = masked_log_softmax(logits, shortlist_mask) else: log_probs = F.log_softmax(logits, dim=-1) - target_log_probs = torch.gather(log_probs, -1, target_inds.unsqueeze(-1)).squeeze(-1) - target_log_probs = target_log_probs * target_mask.float() - # Also don't predict on non-mentions + target_loss = -log_probs.gather( -1, target_inds.unsqueeze(-1)).squeeze(-1) + target_loss = target_loss * target_mask.float() mentions = ~entity_ids.eq(0) - target_log_probs = target_log_probs * mentions.float() + target_loss = target_loss * mentions.float() # self._new_entity_accuracy(predictions=log_probs[mask], # gold_labels=target_inds[mask]) # self._new_entity_accuracy20(predictions=log_probs[mask], # gold_labels=target_inds[mask]) - return -target_log_probs.sum() / (target_mask.sum() + 1e-13) + return target_loss.sum(-1) # / (target_mask.sum(-1).float() + 1e-13) def _parent_log_probs(self, encoded_head: torch.Tensor, @@ -854,7 +858,7 @@ def _knowledge_graph_entity_loss(self, self._parent_ppl(-torch.logsumexp(parent_log_probs, dim=-1)[mask].sum(), mask.float().sum()) self._relation_ppl(-torch.logsumexp(relation_log_probs, dim=-1)[mask].sum(), mask.float().sum()) # Lastly return the tokenwise average loss - return -target_log_probs.sum() / (target_mask.sum() + 1e-13) + return -target_log_probs.sum(-1) # / (target_mask.sum(-1) + 1e-13) def _generate_scores(self, encoded: torch.Tensor, @@ -966,13 +970,13 @@ def _vocab_loss(self, flattened_mask = flattened_mask.squeeze() # Zero out padding loss combined_log_probs_extended_vocab = combined_log_probs_extended_vocab * flattened_mask.float() - vocab_loss = -combined_log_probs_extended_vocab.sum() / (mask.sum() + 1e-13) + vocab_loss = -combined_log_probs_extended_vocab.view(batch_size, sequence_length).sum(-1)# / (mask.sum(-1) + 1e-13) # Unknown penalty - only applies to non-copied unks true_unks = unks.squeeze() & ~copied.squeeze() & flattened_mask penalized_log_probs = combined_log_probs_extended_vocab - self._unk_penalty * true_unks.float() penalized_log_probs[~flattened_mask] = 0 - penalized_vocab_loss = -penalized_log_probs.sum() / (mask.sum() + 1e-13) + penalized_vocab_loss = -penalized_log_probs.view(batch_size, sequence_length).sum(-1)# / (mask.sum(-1) + 1e-13) # PERPLEXITY ### # Our perplexity terms are computed using the log probs computed w.r.t the source diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index ef76e96..3d28b3c 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -187,9 +187,8 @@ def sample(self, # Reset the model if needed self.reset_states(reset) - logp = 0.0 - mask = get_text_field_mask(target).byte() + batch_size = mask.shape[0] # We encode the target tokens (**not** source) since the discriminitative model makes # predictions on the current token, but the generative model expects labels for the # **next** (e.g. target) token! @@ -197,13 +196,16 @@ def sample(self, splits = [self.token_embedding_dim] + [self.entity_embedding_dim] * 2 encoded_token, encoded_head, encoded_relation = encoded.split(splits, dim=-1) + # logp = 0.0 + logp = encoded.new_zeros(batch_size) + # Compute new mention logits mention_logits = self._fc_mention_type(encoded_token) mention_probs = F.softmax(mention_logits, dim=-1) mention_type = parallel_sample(mention_probs) - mention_logp = mention_probs.gather(-1, mention_type.unsqueeze(-1)).log() - mention_logp[~mask] = 0 - mention_logp = mention_logp.sum() + _mention_logp = mention_probs.gather(-1, mention_type.unsqueeze(-1)).log() + _mention_logp[~mask] = 0 + mention_logp = _mention_logp.view(batch_size, -1).sum(-1) # Compute entity logits new_entity_mask = mention_type.eq(1) @@ -221,7 +223,7 @@ def sample(self, _new_entity_logp = new_entity_probs.gather(-1, shortlist_inds.unsqueeze(-1)).log() new_entity_samples = shortlist['entity_ids'].gather(1, shortlist_inds) else: - new_entity_logits = new_entity_logits + new_entity_logits[:,:,:4] = -1e32 # A new entity mustn't be padding, unknown, or a literal # If not using shortlist, then samples are indexed w.r.t to the global vocab new_entity_probs = F.softmax(new_entity_logits, dim=-1) new_entity_samples = parallel_sample(new_entity_probs) @@ -230,7 +232,7 @@ def sample(self, # Zero out masked tokens and non-new entity predictions _new_entity_logp[~mask] = 0 _new_entity_logp[~new_entity_mask] = 0 - new_entity_logp = _new_entity_logp.sum() + new_entity_logp = _new_entity_logp.view(batch_size, -1).sum(-1) # Start filling in the entity ids entity_ids = torch.zeros_like(target['tokens']) @@ -246,7 +248,7 @@ def sample(self, # Derived mentions need to be computed sequentially. parent_ids = torch.zeros_like(target['tokens']).unsqueeze(-1) derived_entity_mask = mention_type.eq(2) - derived_entity_logp = 0.0 + derived_entity_logp = torch.zeros_like(new_entity_logp) sequence_length = target['tokens'].shape[1] for i in range(sequence_length): @@ -287,7 +289,7 @@ def sample(self, parent_logp[viable_candidate_mask] = viable_logp.squeeze(-1) parent_ids[current_mask, i] = _parent_ids[current_mask] # TODO: Double-check - derived_entity_logp += parent_logp[current_mask].sum() + derived_entity_logp[current_mask] += parent_logp[current_mask].squeeze(-1) ## SAMPLE RELATIONS ## @@ -311,7 +313,7 @@ def sample(self, # Get logp. Ignoring the current_mask here is **super** dodgy, but since we forced # null parents to zero we shouldn't be accumulating probabilities for unused predictions. tail_logp = tail_probs.gather(-1, tail_sample).log() - derived_entity_logp += tail_logp.sum() # Sum is redundant, just need it to make logp a scalar + derived_entity_logp[index[:-1]] += tail_logp.sum() # Sum is redundant, just need it to make logp a scalar # Map back to raw id raw_tail_id = tail_id_lookup[tail_sample] @@ -394,6 +396,14 @@ def _encode_source(self, source: Dict[str, torch.Tensor]) -> torch.Tensor: return encoded, alpha_loss, beta_loss + def _mention_type_loss(self, + encoded: torch.Tensor, + mention_type: torch.Tensor, + mask: torch.Tensor) -> torch.Tensor: + """ + Computes the loss for predicting whether or not the the next token will be part of an + entity mention. + """ def _mention_type_loss(self, encoded: torch.Tensor, mention_type: torch.Tensor, @@ -403,8 +413,12 @@ def _mention_type_loss(self, entity mention. """ logits = self._fc_mention_type(encoded) - mention_type_loss = sequence_cross_entropy_with_logits(logits, mention_type, mask, - average='token') + mention_logp = F.log_softmax(logits, -1) + mention_loss = -mention_logp.gather(-1, mention_type.unsqueeze(-1)).squeeze() + mention_loss = mention_loss * mask.float() + # mention_loss = sequence_cross_entropy_with_logits(logits, mention_type, mask, + # average='token') + # if not self.training: self._new_mention_f1(predictions=logits, gold_labels=mention_type, @@ -413,7 +427,7 @@ def _mention_type_loss(self, gold_labels=mention_type, mask=mask) - return mention_type_loss + return mention_loss.sum(-1) def _new_entity_logits(self, encoded: torch.Tensor, @@ -449,23 +463,17 @@ def _new_entity_loss(self, shortlist_mask = get_text_field_mask(shortlist) log_probs = masked_log_softmax(logits, shortlist_mask) else: - logits = logits log_probs = F.log_softmax(logits, dim=-1) - num_categories = log_probs.shape[-1] - log_probs = log_probs.view(-1, num_categories) - target_inds = target_inds.view(-1) - target_log_probs = torch.gather(log_probs, -1, target_inds.unsqueeze(-1)).squeeze(-1) - - mask = ~target_inds.eq(0) - target_log_probs[~mask] = 0 + loss = -log_probs.gather(-1, target_inds.unsqueeze(-1)).squeeze(-1) + loss = loss * target_mask.float() - if mask.any(): - self._new_entity_accuracy(predictions=log_probs[mask], - gold_labels=target_inds[mask]) - self._new_entity_accuracy20(predictions=log_probs[mask], - gold_labels=target_inds[mask]) + if target_mask.any(): + self._new_entity_accuracy(predictions=log_probs[target_mask], + gold_labels=target_inds[target_mask]) + self._new_entity_accuracy20(predictions=log_probs[target_mask], + gold_labels=target_inds[target_mask]) - return -target_log_probs.sum() / (target_mask.sum() + 1e-13) + return loss.sum(-1) # / (target_mask.sum() + 1e-13) def _parent_log_probs(self, encoded_head: torch.Tensor, @@ -577,7 +585,7 @@ def _knowledge_graph_entity_loss(self, self._parent_ppl(-torch.logsumexp(parent_log_probs, dim=-1)[mask].sum(), mask.float().sum()) self._relation_ppl(-torch.logsumexp(relation_log_probs, dim=-1)[mask].sum(), mask.float().sum()) # Lastly return the tokenwise average loss - return -target_log_probs.sum() / (target_mask.sum() + 1e-13) + return -target_log_probs.sum(-1) # / (target_mask.sum() + 1e-13) def _forward_loop(self, source: Dict[str, torch.Tensor], @@ -610,7 +618,7 @@ def _forward_loop(self, # Predict whether or not the next token will be an entity mention, and if so which type. mention_type_loss = self._mention_type_loss(encoded_token, mention_type, target_mask) - self._avg_mention_type_loss(float(mention_type_loss)) + self._avg_mention_type_loss(float(mention_type_loss.sum()/target_mask.sum())) # For new mentions, predict which entity (among those in the supplied shortlist) will be # mentioned. @@ -624,8 +632,9 @@ def _forward_loop(self, entity_ids, None, target_mask) + logger.debug('new_entity_loss: %s', new_entity_loss) - self._avg_new_entity_loss(float(new_entity_loss)) + self._avg_new_entity_loss(float(new_entity_loss.sum()/target_mask.sum())) # For derived mentions, first predict which parent(s) to expand... knowledge_graph_entity_loss = self._knowledge_graph_entity_loss(encoded_head, @@ -634,10 +643,10 @@ def _forward_loop(self, entity_ids, parent_ids, target_mask) - self._avg_knowledge_graph_entity_loss(float(knowledge_graph_entity_loss)) + self._avg_knowledge_graph_entity_loss(float(knowledge_graph_entity_loss.sum()/target_mask.sum())) # Compute total loss - loss = mention_type_loss + new_entity_loss + knowledge_graph_entity_loss + loss = (mention_type_loss + new_entity_loss + knowledge_graph_entity_loss).sum() / target_mask.sum() # Activation regularization if self._alpha: From b3270481e6ed20086822973f38cc691a40c472e3 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 01:51:59 -0800 Subject: [PATCH 31/35] Updated tests --- kglm/tests/fixtures/conll2012.jsonl | 4 ++-- kglm/tests/models/kglm_test.py | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/kglm/tests/fixtures/conll2012.jsonl b/kglm/tests/fixtures/conll2012.jsonl index 3d5e39c..a0bc1fa 100644 --- a/kglm/tests/fixtures/conll2012.jsonl +++ b/kglm/tests/fixtures/conll2012.jsonl @@ -1,2 +1,2 @@ -{"tokens": ["Jesus", "left", "and", "went", "back", "to", "his", "hometown", ".", "His", "followers", "went", "with", "him", ".", "On", "the", "Sabbath", "day", "Jesus", "taught", "in", "the", "synagogue", ",", "and", "many", "people", "heard", "him", ".", "They", "were", "amazed", "and", "said", ",", "``", "Where", "did", "this", "man", "get", "this", "teaching", "?", "How", "did", "he", "get", "such", "wisdom", "?", "Who", "gave", "it", "to", "him", "?", "And", "where", "did", "he", "get", "the", "power", "to", "do", "miracles", "?", "Is", "n't", "he", "just", "the", "carpenter", "we", "know", "--", "Mary", "'s", "son", ",", "the", "brother", "of", "James", ",", "Joses", ",", "Judas", ",", "and", "Simon", "?", "And", "do", "n't", "his", "sisters", "still", "live", "here", "in", "town", "?", "''", "So", "they", "had", "a", "problem", "accepting", "him", ".", "Then", "Jesus", "said", "to", "them", ",", "``", "People", "everywhere", "give", "honor", "to", "a", "prophet", ",", "except", "in", "his", "own", "town", ",", "with", "his", "own", "people", ",", "or", "in", "his", "home", ".", "''", "Jesus", "was", "not", "able", "to", "do", "any", "miracles", "there", "except", "the", "healing", "of", "some", "sick", "people", "by", "laying", "his", "hands", "on", "them", ".", "He", "was", "surprised", "that", "the", "people", "there", "had", "no", "faith", ".", "Then", "he", "went", "to", "other", "villages", "in", "that", "area", "and", "taught", ".", "Jesus", "called", "his", "twelve", "apostles", "together", ".", "He", "sent", "them", "out", "in", "groups", "of", "two", "and", "gave", "them", "power", "over", "evil", "spirits", ".", "This", "is", "what", "he", "told", "them", ":", "``", "Take", "nothing", "for", "your", "trip", "except", "a", "stick", "for", "walking", ".", "Take", "no", "bread", ",", "no", "bag", ",", "and", "no", "money", ".", "You", "can", "wear", "sandals", ",", "but", "do", "n't", "take", "extra", "clothes", ".", "When", "you", "enter", "a", "house", ",", "stay", "there", "until", "you", "leave", "that", "town", ".", "If", "any", "town", "refuses", "to", "accept", "you", "or", "refuses", "to", "listen", "to", "you", ",", "then", "leave", "that", "town", "and", "shake", "the", "dust", "off", "your", "feet", "as", "a", "warning", "to", "them", ".", "''", "The", "apostles", "left", "and", "went", "to", "other", "places", ".", "They", "talked", "to", "the", "people", "and", "told", "them", "to", "change", "their", "hearts", "and", "lives", ".", "They", "forced", "many", "demons", "out", "of", "people", "and", "put", "olive", "oil", "on", "many", "who", "were", "sick", "and", "healed", "them", ".", "King", "Herod", "heard", "about", "Jesus", ",", "because", "Jesus", "was", "now", "famous", ".", "Some", "people", "said", ",", "``", "He", "is", "John", "the", "Baptizer", ".", "He", "must", "have", "risen", "from", "death", ",", "and", "that", "is", "why", "he", "can", "do", "these", "miracles", ".", "''", "Other", "people", "said", ",", "``", "He", "is", "Elijah", ".", "''", "And", "others", "said", ",", "``", "He", "is", "a", "prophet", ".", "He", "is", "like", "the", "prophets", "who", "lived", "long", "ago", ".", "''"], "clusters": {"2": [[0, 1], [6, 7], [13, 14], [19, 20], [29, 30], [40, 42], [48, 49], [57, 58], [62, 63], [72, 73], [98, 99], [113, 114], [116, 117], [147, 148], [165, 166], [170, 171], [182, 183], [193, 194], [200, 201], [219, 220], [352, 353], [355, 356], [365, 366], [371, 372], [382, 383], [394, 395], [404, 405], [409, 410]], "23": [[9, 11], [195, 198], [202, 203], [210, 211], [221, 222], [227, 228], [246, 247], [259, 260], [267, 268], [278, 279], [284, 285], [295, 296], [304, 306], [313, 314], [328, 329]], "8": [[26, 28], [31, 32], [76, 77], [108, 109], [119, 120]], "7": [[50, 52], [55, 56]], "33": [[127, 129], [132, 133], [137, 138], [143, 144]], "29": [[160, 163], [168, 169]], "35": [[273, 275], [288, 290]], "10": [[316, 318], [320, 321], [323, 324]], "15": [[340, 344], [346, 347]], "3": [[374, 375], [379, 380]]}} -{"tokens": ["Herod", "heard", "these", "things", "about", "Jesus", ".", "He", "said", ",", "``", "I", "killed", "John", "by", "cutting", "off", "his", "head", ".", "Now", "he", "has", "been", "raised", "from", "death", "!", "''", "Herod", "himself", "had", "ordered", "his", "soldiers", "to", "arrest", "John", "and", "put", "him", "in", "prison", ".", "Herod", "did", "this", "to", "please", "his", "wife", "Herodias", ".", "She", "had", "been", "married", "to", "Herod", "'s", "brother", "Philip", ",", "but", "then", "Herod", "married", "her", ".", "John", "told", "Herod", ",", "``", "It", "is", "not", "right", "for", "you", "to", "be", "married", "to", "your", "brother", "'s", "wife", ".", "''", "So", "Herodias", "hated", "John", ".", "She", "wanted", "him", "dead", ",", "but", "she", "was", "not", "able", "to", "persuade", "Herod", "to", "kill", "him", ".", "Herod", "was", "afraid", "to", "kill", "John", ",", "because", "he", "knew", "that", "he", "was", "a", "good", "and", "holy", "man", ".", "So", "he", "protected", "him", ".", "He", "liked", "listening", "to", "John", ",", "although", "what", "John", "said", "left", "him", "with", "so", "many", "questions", ".", "Then", "the", "right", "time", "came", "for", "Herodias", "to", "cause", "John", "'s", "death", ".", "It", "happened", "on", "Herod", "'s", "birthday", ".", "Herod", "gave", "a", "dinner", "party", "for", "the", "most", "important", "government", "leaders", ",", "the", "commanders", "of", "his", "army", ",", "and", "the", "most", "important", "people", "in", "Galilee", ".", "The", "daughter", "of", "Herodias", "came", "to", "the", "party", "and", "danced", ".", "When", "she", "danced", ",", "Herod", "and", "the", "people", "eating", "with", "him", "were", "very", "pleased", ".", "So", "King", "Herod", "said", "to", "the", "girl", ",", "``", "I", "will", "give", "you", "anything", "you", "want", ".", "''", "He", "promised", "her", ",", "``", "Anything", "you", "ask", "for", "I", "will", "give", "to", "you", "--", "even", "half", "of", "my", "kingdom", ".", "''", "The", "girl", "went", "to", "her", "mother", "and", "asked", ",", "``", "What", "should", "I", "ask", "King", "Herod", "to", "give", "me", "?", "''", "Her", "mother", "answered", ",", "``", "Ask", "for", "the", "head", "of", "John", "the", "Baptizer", ".", "''", "So", "right", "then", "the", "girl", "went", "back", "in", "to", "the", "king", ".", "She", "said", "to", "him", ",", "``", "Please", "give", "me", "the", "head", "of", "John", "the", "Baptizer", ".", "Bring", "it", "to", "me", "now", "on", "a", "plate", ".", "''", "King", "Herod", "was", "very", "sad", ",", "but", "he", "did", "n't", "want", "to", "break", "the", "promise", "he", "had", "made", "to", "her", "in", "front", "of", "his", "guests", ".", "So", "he", "sent", "a", "soldier", "to", "cut", "off", "John", "'s", "head", "and", "bring", "it", "to", "him", ".", "The", "soldier", "went", "and", "cut", "off", "John", "'s", "head", "in", "the", "prison", ".", "He", "brought", "the", "head", "back", "on", "a", "plate", "and", "gave", "it", "to", "the", "girl", ",", "and", "the", "girl", "gave", "it", "to", "her", "mother", ".", "John", "'s", "followers", "heard", "about", "what", "happened", ",", "so", "they", "came", "and", "got", "John", "'s", "body", "and", "put", "it", "in", "a", "tomb", ".", "The", "apostles", "Jesus", "had", "sent", "out", "came", "back", "to", "him", ".", "They", "gathered", "around", "him", "and", "told", "him", "about", "all", "they", "had", "done", "and", "taught", ".", "Jesus", "and", "his", "followers", "were", "in", "a", "very", "busy", "place", ".", "There", "were", "so", "many", "people", "that", "he", "and", "his", "followers", "did", "not", "even", "have", "time", "to", "eat", ".", "He", "said", "to", "them", ",", "``", "Come", "with", "me", ".", "We", "will", "go", "to", "a", "quiet", "place", "to", "be", "alone", ".", "There", "we", "will", "get", "some", "rest", ".", "''"], "clusters": {"2": [[0, 1], [7, 8], [11, 12], [29, 31], [33, 34], [44, 45], [65, 66], [71, 72], [79, 80], [107, 108], [112, 113], [120, 121], [132, 133], [136, 137], [147, 148], [169, 171], [173, 174], [214, 215], [226, 228], [234, 235], [243, 244], [252, 253], [261, 262], [279, 281], [310, 312], [316, 317], [339, 341], [346, 347], [354, 355], [366, 367], [380, 381]], "19": [[5, 6], [451, 452], [456, 457], [459, 460], [497, 498], [505, 506]], "32": [[13, 14], [21, 22], [37, 38], [40, 41], [69, 70], [93, 94], [97, 98], [110, 111], [117, 118], [123, 124], [134, 135], [140, 141], [144, 145], [162, 164]], "27": [[17, 19], [293, 299], [322, 328], [330, 331], [373, 376], [378, 379], [388, 391], [397, 399], [405, 406], [414, 415]], "26": [[32, 33], [46, 47]], "17": [[49, 52], [53, 54], [67, 68], [84, 88], [91, 92], [95, 96], [101, 102], [159, 160], [269, 271], [286, 288], [416, 418]], "13": [[58, 62]], "0": [[175, 178], [205, 207]], "31": [[179, 198], [216, 221], [362, 364]], "14": [[199, 203], [211, 212], [230, 232], [237, 238], [239, 240], [245, 246], [249, 250], [256, 257], [265, 267], [277, 278], [283, 284], [304, 306], [313, 314], [321, 322], [332, 333], [358, 359], [407, 409], [411, 413]], "9": [[368, 370], [382, 384], [395, 396]], "22": [[419, 422], [428, 429]], "30": [[432, 435], [437, 438]], "18": [[442, 448], [453, 454], [462, 463], [500, 501]], "1": [[468, 472], [485, 489], [507, 508], [519, 520]]}} +{"tokens": ["@@START@@", "in", "the", "summer", "of", "@@NUM@@", "a", "picture", "that", "people", "have", "long", "been", "looking", "forward", "to", "started", "emerging", "with", "frequency", "in", "various", "major", "hong", "kong", "media", "@@END@@", "@@START@@", "with", "their", "unique", "charm", "these", "well", "-", "known", "cartoon", "images", "once", "again", "caused", "hong", "kong", "to", "be", "a", "focus", "of", "worldwide", "attention", "@@END@@", "@@START@@", "the", "world", "'s", "fifth", "disney", "park", "will", "soon", "open", "to", "the", "public", "here", "@@END@@", "@@START@@", "the", "most", "important", "thing", "about", "disney", "is", "that", "it", "is", "a", "global", "brand", "@@END@@", "@@START@@", "well", "for", "several", "years", "although", "it", "was", "still", "under", "construction", "and", "er", "not", "yet", "open", "it", "can", "be", "said", "that", "many", "people", "have", "viewed", "hong", "kong", "with", "new", "respect", "@@END@@", "@@START@@", "then", "welcome", "to", "the", "official", "writing", "ceremony", "of", "hong", "kong", "disneyland", "@@END@@", "@@START@@", "the", "construction", "of", "hong", "kong", "disneyland", "began", "two", "years", "ago", "in", "@@NUM@@", "@@END@@", "@@START@@", "in", "january", "of", "that", "year", "the", "hong", "kong", "government", "turned", "over", "to", "disney", "corporation", "@@NUM@@", "hectares", "of", "land", "at", "the", "foot", "of", "lantau", "island", "that", "was", "obtained", "following", "the", "largest", "land", "reclamation", "project", "in", "recent", "years", "@@END@@", "@@START@@", "one", "@@END@@", "@@START@@", "since", "then", "this", "area", "has", "become", "a", "prohibited", "zone", "in", "hong", "kong", "@@END@@", "@@START@@", "as", "its", "neighbor", "on", "lantau", "island", "hong", "kong", "international", "airport", "had", "to", "change", "its", "flight", "routes", "to", "make", "this", "area", "a", "no", "fly", "zone", "@@END@@", "@@START@@", "mickey", "mouse", "'s", "new", "home", "settling", "on", "chinese", "land", "for", "the", "first", "time", "has", "captured", "worldwide", "attention", "@@END@@", "@@START@@", "there", "'s", "only", "one", "month", "left", "before", "the", "opening", "of", "hong", "kong", "disneyland", "on", "september", "@@NUM@@", "@@END@@", "@@START@@", "the", "subway", "to", "disney", "has", "already", "been", "constructed", "@@END@@", "@@START@@", "at", "subway", "stations", "passengers", "will", "frequently", "press", "the", "station", "for", "disney", "on", "ticket", "machines", "trying", "to", "purchase", "tickets", "to", "enjoy", "the", "park", "when", "it", "first", "opens", "@@END@@", "@@START@@", "meanwhile", "the", "disney", "subway", "station", "is", "scheduled", "to", "open", "on", "the", "same", "day", "as", "the", "park", "@@END@@", "@@START@@", "for", "two", "years", "disney", "has", "constantly", "maintained", "its", "mystery", "@@END@@", "@@START@@", "no", "media", "have", "been", "allowed", "to", "enter", "for", "photos", "@@END@@", "@@START@@", "we", "took", "a", "taxi", "along", "the", "path", "of", "the", "highway", "that", "heads", "toward", "disney", "trying", "to", "experience", "this", "mysterious", "park", "from", "close", "by", "@@END@@", "@@START@@", "however", "before", "any", "of", "the", "disney", "symbols", "were", "in", "sight", "the", "car", "was", "stopped", "by", "a", "security", "guard", "at", "the", "intersection", "of", "the", "road", "towards", "disney", "@@END@@", "@@START@@", "on", "our", "way", "back", "the", "taxi", "driver", "gave", "us", "an", "explanation", "after", "understanding", "our", "intentions", "@@END@@", "@@START@@", "er", "according", "to", "what", "the", "security", "guard", "said", "for", "the", "time", "before", "everything", "is", "officially", "opened", "no", "cars", "can", "enter", "unless", "they", "have", "special", "permission", "@@END@@", "@@START@@", "no", "one", "can", "enter", "otherwise", "@@END@@", "@@START@@", "video", "recording", "is", "especially", "forbidden", "@@END@@", "@@START@@", "ah", "everything", "is", "top", "secret", "@@END@@", "@@START@@", "if", "pictures", "are", "taken", "without", "permission", "that", "is", "to", "say", "it", "will", "at", "all", "times", "be", "pursued", "by", "legal", "action", "a", "big", "hassle", "@@END@@", "@@START@@", "although", "disney", "corporation", "chose", "hong", "kong", "as", "the", "venue", "for", "the", "chinese", "disney", "park", "what", "they", "are", "actually", "most", "excited", "about", "is", "the", "mainland", "china", "tourist", "market", "@@END@@"], "clusters": {"23": [[23, 25], [41, 43], [106, 108], [146, 148], [191, 193], [483, 485]], "18": [[29, 30], [32, 38]], "12": [[52, 58], [87, 88], [121, 124], [129, 132], [221, 226], [250, 253], [261, 262], [278, 279], [288, 290], [291, 292], [298, 299], [317, 318], [321, 322], [349, 350], [353, 356], [386, 387], [489, 493]], "50": [[72, 73], [75, 76], [152, 154], [366, 367], [480, 482], [494, 495]], "13": [[137, 138], [143, 145]], "14": [[154, 176], [183, 185], [196, 197], [213, 215]], "8": [[199, 201]], "29": [[201, 205], [208, 209]], "4": [[254, 256], [306, 312]], "40": [[336, 337], [390, 391], [397, 398], [402, 403]], "21": [[338, 340], [371, 373]], "31": [[376, 379], [410, 413]]}} +{"tokens": ["@@START@@", "since", "the", "implementation", "of", "the", "individual", "visit", "scheme", "between", "hong", "kong", "and", "the", "mainland", "more", "and", "more", "mainland", "tourists", "are", "coming", "to", "visit", "hong", "kong", "@@END@@", "@@START@@", "from", "the", "beginning", "up", "till", "now", "more", "than", "seven", "million", "individual", "tourists", "have", "come", "to", "hong", "kong", "@@END@@", "@@START@@", "well", "we", "now", "er", "believe", "more", "will", "be", "coming", "@@END@@", "@@START@@", "at", "this", "point", "it", "has", "been", "about", "two", "years", "@@END@@", "@@START@@", "also", "the", "current", "number", "of", "@@NUM@@", "cities", "will", "be", "increased", "@@END@@", "@@START@@", "hong", "kong", "was", "developed", "from", "a", "fishing", "harbor", "one", "hundred", "years", "ago", "to", "become", "today", "'s", "international", "metropolis", "@@END@@", "@@START@@", "here", "eastern", "and", "western", "cultures", "have", "gathered", "and", "the", "new", "and", "the", "old", "coexist", "@@END@@", "@@START@@", "when", "in", "hong", "kong", "you", "can", "wander", "among", "skyscrapers", "heartily", "enjoy", "shopping", "sprees", "in", "well", "known", "stores", "and", "malls", "for", "goods", "from", "various", "countries", "and", "taste", "delicious", "snacks", "from", "all", "over", "the", "world", "at", "tea", "shops", "or", "at", "street", "stands", "in", "mong", "kok", "@@END@@", "@@START@@", "you", "can", "go", "to", "burn", "incense", "and", "make", "a", "vow", "at", "the", "repulse", "bay", "where", "all", "deities", "gather", "@@END@@", "@@START@@", "you", "can", "enjoy", "the", "most", "charming", "sun", "filled", "sandy", "beaches", "in", "hong", "kong", "@@END@@", "@@START@@", "you", "can", "ascend", "victoria", "peak", "to", "get", "a", "panoramic", "view", "of", "victoria", "harbor", "'s", "beautiful", "scenery", "@@END@@", "@@START@@", "or", "hop", "onto", "a", "trolley", "with", "over", "a", "century", "of", "history", "and", "feel", "the", "city", "'s", "blend", "of", "the", "old", "and", "the", "modern", "in", "slow", "motion", "@@END@@"], "clusters": {"19": [[10, 12], [24, 26], [43, 45], [81, 83], [119, 121], [193, 195], [228, 231]]}} diff --git a/kglm/tests/models/kglm_test.py b/kglm/tests/models/kglm_test.py index 82991da..c7a2c91 100644 --- a/kglm/tests/models/kglm_test.py +++ b/kglm/tests/models/kglm_test.py @@ -53,7 +53,7 @@ def test_sample(self): instances = list(reader.read(dataset_file)) iterator = DataIterator.from_params(generator_params['iterator']) iterator.index_with(self.model.vocab) - batch, _ = next(iterator(instances, shuffle=False)) + batch = next(iterator(instances, shuffle=False)) self.model.sample(**batch) @@ -80,7 +80,7 @@ def test_sample(self): iterator = DataIterator.from_params(generator_params['iterator']) iterator.index_with(self.model.vocab) - batch, _ = next(iterator(instances, shuffle=False)) + batch = next(iterator(instances, shuffle=False)) # Samples should match (we'll test by comparing logp) torch.manual_seed(123) @@ -101,7 +101,7 @@ def test_beam_search(self): iterator = DataIterator.from_params(generator_params['iterator']) iterator.index_with(self.model.vocab) - batch, _ = next(iterator(instances, shuffle=False)) + batch = next(iterator(instances, shuffle=False)) # Just want to check that function does not raise an error for now. self.model.beam_search(batch['source'], @@ -151,8 +151,7 @@ def test_next_new_entity_logp(self): recent_entities_state_1 = {'remaining': [{}]} beam_states = [ KglmBeamState(recent_entities=recent_entities_state_0, ongoing=ongoing), - KglmBeamState(recent_entities=recent_entities_state_1, ongoing=ongoing) - ] + KglmBeamState(recent_entities=recent_entities_state_1, ongoing=ongoing)] next_new_entity_logp = self.model._next_new_entity_logp(next_new_entity_logits, beam_states) # Log probabilities should be different on first beam, and same on second. From 6c11685b42091b3c4293ec009b5f486bd64b0e6f Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 02:18:01 -0800 Subject: [PATCH 32/35] Added temperature to KglmDisc --- kglm/models/kglm_disc.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index 3d28b3c..46c231d 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -140,7 +140,7 @@ def forward(self, # pylint: disable=arguments-differ parent_ids: Dict[str, torch.Tensor] = None, relations: Dict[str, torch.Tensor] = None, shortlist: Dict[str, torch.Tensor] = None, - shortlist_inds: torch.Tensor = None) -> Dict[str, torch.Tensor]: + shortlist_inds: torch.Tensor = None, **kwargs) -> Dict[str, torch.Tensor]: # Tensorize the alias_database - this will only perform the operation once. alias_database = metadata[0]['alias_database'] @@ -174,6 +174,7 @@ def sample(self, metadata: Dict[str, Any], alias_copy_inds: torch.Tensor, shortlist: Dict[str, torch.Tensor] = None, + temperature: float, **kwargs) -> Dict[str, Any]: # **kwargs intended to eat the other fields if they are provided. """ Sampling annotations for the generative model. Note that unlike forward, this function @@ -200,7 +201,7 @@ def sample(self, logp = encoded.new_zeros(batch_size) # Compute new mention logits - mention_logits = self._fc_mention_type(encoded_token) + mention_logits = self._fc_mention_type(encoded_token) / temperature mention_probs = F.softmax(mention_logits, dim=-1) mention_type = parallel_sample(mention_probs) _mention_logp = mention_probs.gather(-1, mention_type.unsqueeze(-1)).log() @@ -209,7 +210,7 @@ def sample(self, # Compute entity logits new_entity_mask = mention_type.eq(1) - new_entity_logits = self._new_entity_logits(encoded_head + encoded_relation, shortlist) + new_entity_logits = self._new_entity_logits(encoded_head + encoded_relation, shortlist) / temperature if self._use_shortlist: # If using shortlist, then samples are indexed w.r.t the shortlist and entity_ids must be looked up shortlist_mask = get_text_field_mask(shortlist) @@ -270,7 +271,7 @@ def sample(self, # Compute logits w.r.t **current** hidden state only current_head_encoding = encoded_head[:, i].unsqueeze(1) - selection_logits = torch.bmm(current_head_encoding, candidate_embeddings.transpose(1, 2)) + selection_logits = torch.bmm(current_head_encoding, candidate_embeddings.transpose(1, 2)) / temperature selection_probs = masked_softmax(selection_logits, candidate_mask) # Only sample if the is at least one viable candidate (e.g. if a sampling distribution @@ -305,7 +306,7 @@ def sample(self, # Compute the score for each relation w.r.t the current encoding. NOTE: In the loss # code index has a slice. We don't need that here since there is always a # **single** parent. - logits = torch.mv(relation_embedding, current_relation_encoding[index]) + logits = torch.mv(relation_embedding, current_relation_encoding[index]) / temperature # Convert to probability tail_probs = F.softmax(logits, dim=-1) # Sample @@ -467,11 +468,11 @@ def _new_entity_loss(self, loss = -log_probs.gather(-1, target_inds.unsqueeze(-1)).squeeze(-1) loss = loss * target_mask.float() - if target_mask.any(): - self._new_entity_accuracy(predictions=log_probs[target_mask], - gold_labels=target_inds[target_mask]) - self._new_entity_accuracy20(predictions=log_probs[target_mask], - gold_labels=target_inds[target_mask]) + # if target_mask.any(): + # self._new_entity_accuracy(predictions=log_probs[target_mask], + # gold_labels=target_inds[target_mask]) + # self._new_entity_accuracy20(predictions=log_probs[target_mask], + # gold_labels=target_inds[target_mask]) return loss.sum(-1) # / (target_mask.sum() + 1e-13) From 39303d54a30b79496b0af7a11c8bf5725d049131 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 02:35:40 -0800 Subject: [PATCH 33/35] BUGFIX: Temperature default parm --- kglm/models/kglm_disc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index 46c231d..aa24148 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -174,7 +174,7 @@ def sample(self, metadata: Dict[str, Any], alias_copy_inds: torch.Tensor, shortlist: Dict[str, torch.Tensor] = None, - temperature: float, + temperature: float = 1.0, **kwargs) -> Dict[str, Any]: # **kwargs intended to eat the other fields if they are provided. """ Sampling annotations for the generative model. Note that unlike forward, this function From 453189bf90a4596fe27c605d555c3c416f0f372c Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 8 Dec 2019 23:48:26 -0800 Subject: [PATCH 34/35] Cleaner importance sampling logging + added offset option to KglmDisc --- kglm/commands/evaluate_perplexity.py | 21 ++++++++++++++------- kglm/models/kglm_disc.py | 6 +++++- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/kglm/commands/evaluate_perplexity.py b/kglm/commands/evaluate_perplexity.py index e71caa6..6d0fe25 100644 --- a/kglm/commands/evaluate_perplexity.py +++ b/kglm/commands/evaluate_perplexity.py @@ -58,7 +58,7 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar subparser.add_argument('--split-size', type=int, - default=1e10, + default=int(1e10), help='Split size (default: whatever iterator was set to)') subparser.add_argument('--num-samples', @@ -107,6 +107,10 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar def split(batch, split_size: int): sequence_length = batch['source']['tokens'].shape[1] num_splits = sequence_length // split_size + if not ((sequence_length % split_size) == 0): + num_splits += 1 + else: + logger.warning('Perfect split') def _chunk(x, start, stop): if isinstance(x, dict): @@ -156,7 +160,7 @@ def evaluate_perplexity(model: Model, temperature: float = 1.0, offset: bool = False, samples_per_batch: int = 1, - split_size: int = 1e10) -> Dict[str, Any]: + split_size: int = int(1e10)) -> Dict[str, Any]: check_for_gpu(cuda_device) logger.info('Iterating over dataset') @@ -193,14 +197,17 @@ def evaluate_perplexity(model: Model, for i in range(num_samples // samples_per_batch): - logger.info(f'i={i}') - # summand = util.move_to_device(summand, cuda_device) # batch = util.move_to_device(batch, cuda_device) weights = None - for chunk in split(batch, split_size): - logger.info('next_chunk') + for j, chunk in enumerate(split(batch, split_size)): + generator_tqdm.set_description(f"i={i} j={j}") + + chunk_tokens = util.get_text_field_mask(batch['source']).int().sum().item() + if chunk_tokens == 0: + logger.debug('Zero chunk, skipping') + continue # Draw a sample with torch.no_grad(): @@ -294,7 +301,7 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # deal with averaging the importance samples across splits ... # if args.split_size is not None: # iterator_params['split_size'] = args.split_size - iterator_params['split_size'] = 1e10 + iterator_params['split_size'] = int(1e10) iterator_params['truncate'] = False # TODO: Shouldn't need this anymore... iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index aa24148..ef81587 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -175,6 +175,7 @@ def sample(self, alias_copy_inds: torch.Tensor, shortlist: Dict[str, torch.Tensor] = None, temperature: float = 1.0, + offset: bool = False, **kwargs) -> Dict[str, Any]: # **kwargs intended to eat the other fields if they are provided. """ Sampling annotations for the generative model. Note that unlike forward, this function @@ -193,7 +194,10 @@ def sample(self, # We encode the target tokens (**not** source) since the discriminitative model makes # predictions on the current token, but the generative model expects labels for the # **next** (e.g. target) token! - encoded, *_ = self._encode_source(target['tokens']) + if not offset: + encoded, *_ = self._encode_source(target['tokens']) + else: + encoded, *_ = self._encode_source(source['tokens']) splits = [self.token_embedding_dim] + [self.entity_embedding_dim] * 2 encoded_token, encoded_head, encoded_relation = encoded.split(splits, dim=-1) From ef6b352c9a54ba78a7725dac534903875688c126 Mon Sep 17 00:00:00 2001 From: rloganiv Date: Sun, 12 Jul 2020 19:48:31 -0700 Subject: [PATCH 35/35] ACL 2020 paper updates --- Dockerfile | 5 ++-- kglm/commands/beamsum.py | 9 ++++-- kglm/commands/evaluate_perplexity.py | 42 +++++++++++++++------------- kglm/data/__init__.py | 1 + kglm/models/__init__.py | 2 ++ kglm/models/entity_disc.py | 3 +- kglm/models/kglm_disc.py | 17 ++++++----- 7 files changed, 46 insertions(+), 33 deletions(-) diff --git a/Dockerfile b/Dockerfile index 16239fd..dbc7ddc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -FROM python:3.6.8-jessie - +# FROM python:3.6.8-jessie +FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-runtime ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 @@ -18,6 +18,7 @@ WORKDIR /workspace RUN chmod -R a+w /workspace COPY requirements.txt . +RUN pip install --upgrade pip RUN pip install -r requirements.txt COPY .pylintrc .pylintrc diff --git a/kglm/commands/beamsum.py b/kglm/commands/beamsum.py index 7d5b38f..9b0bbeb 100644 --- a/kglm/commands/beamsum.py +++ b/kglm/commands/beamsum.py @@ -112,9 +112,12 @@ def evaluate_perplexity(model: Model, # Draw a sample with torch.no_grad(): - sample = sampler.beam_search(source=batch['source'], - reset=batch['reset'], - k=beam_width) + # sample = sampler.beam_search(source=batch['source'], + # reset=batch['reset'], + # target=batch['target'], + # metadata=batch['metadata'], + # k=beam_width) + sample = sampler.beam_search(**batch, k=beam_width) # Evaluate on sample with torch.no_grad(): diff --git a/kglm/commands/evaluate_perplexity.py b/kglm/commands/evaluate_perplexity.py index 6d0fe25..2300776 100644 --- a/kglm/commands/evaluate_perplexity.py +++ b/kglm/commands/evaluate_perplexity.py @@ -118,13 +118,15 @@ def _chunk(x, start, stop): if isinstance(x, torch.Tensor): return x[:, start:stop].contiguous() + chunks = [] for i in range(num_splits): chunk = _chunk(batch, i * split_size, (i + 1) * split_size) if i > 0: chunk['reset'] = torch.zeros_like(chunk['reset']) - yield chunk + chunks.append(chunk) + return chunks def tile(t, amount): @@ -138,17 +140,17 @@ def tile(t, amount): return [x for x in t for _ in range(amount)] -def logsumexp(prev: torch.FloatTensor, - current: torch.FloatTensor, - i: int, - samples_per_batch: int): - # NOTE: n is number of samples - current_avg = current.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() - if prev is None: - return current_avg - a = torch.max(prev, current_avg) - sumexp = torch.exp(prev - a) * i / (i + 1) + torch.exp(current_avg - a) / (i + 1) - return a + torch.log(sumexp) +# def logsumexp(prev: torch.FloatTensor, +# current: torch.FloatTensor, +# i: int, +# samples_per_batch: int): +# # NOTE: n is number of samples +# current_avg = current.view(samples_per_batch, -1).sum(dim=-1).logsumexp(dim=0) - np.log(samples_per_batch).item() +# if prev is None: +# return current_avg +# a = torch.max(prev, current_avg) +# sumexp = torch.exp(prev - a) * i / (i + 1) + torch.exp(current_avg - a) / (i + 1) +# return a + torch.log(sumexp) def evaluate_perplexity(model: Model, @@ -204,7 +206,7 @@ def evaluate_perplexity(model: Model, for j, chunk in enumerate(split(batch, split_size)): generator_tqdm.set_description(f"i={i} j={j}") - chunk_tokens = util.get_text_field_mask(batch['source']).int().sum().item() + chunk_tokens = util.get_text_field_mask(batch['source']).int().sum() if chunk_tokens == 0: logger.debug('Zero chunk, skipping') continue @@ -230,11 +232,11 @@ def evaluate_perplexity(model: Model, weights = split_weights else: weights += split_weights - logger.debug(torch.exp(-split_weights/split_size)) + # logger.debug(torch.exp(-split_weights/split_size)) - epoch_weights.append(weights.cpu()) - epoch_fp.append(model_logp.view(batch_size, samples_per_batch).cpu()) - epoch_q.append(sample_logp.view(batch_size, samples_per_batch).cpu()) + epoch_weights.append(weights) #.cpu()) + epoch_fp.append(model_logp.view(batch_size, samples_per_batch))# .cpu()) + epoch_q.append(sample_logp.view(batch_size, samples_per_batch))# .cpu()) # Combine all the epoch weights combined_weights = torch.cat(epoch_weights, dim=1) @@ -251,9 +253,9 @@ def evaluate_perplexity(model: Model, logger.info(f'PPL: {torch.exp(-summand / denom)}') # Create array of all the weights - all_weights_array = torch.cat(all_weights, dim=0).numpy() - fp_array = torch.cat(fp, dim=0).numpy() - q_array = torch.cat(q, dim=0).numpy() + all_weights_array = torch.cat(all_weights, dim=0).cpu().numpy() + fp_array = torch.cat(fp, dim=0).cpu().numpy() + q_array = torch.cat(q, dim=0).cpu().numpy() # Compute perplexity ppl = torch.exp(-summand / denom) diff --git a/kglm/data/__init__.py b/kglm/data/__init__.py index 3a4cecd..b17416b 100644 --- a/kglm/data/__init__.py +++ b/kglm/data/__init__.py @@ -2,3 +2,4 @@ from .fields import SequentialArrayField from .iterators import SplitIterator from .extended_vocabulary import ExtendedVocabulary +from .dataset_readers import * diff --git a/kglm/models/__init__.py b/kglm/models/__init__.py index e69de29..e6ff16c 100644 --- a/kglm/models/__init__.py +++ b/kglm/models/__init__.py @@ -0,0 +1,2 @@ +from .entity_disc import EntityNLMDiscriminator +from .entity_nlm import EntityNLM diff --git a/kglm/models/entity_disc.py b/kglm/models/entity_disc.py index 12dd790..3893119 100644 --- a/kglm/models/entity_disc.py +++ b/kglm/models/entity_disc.py @@ -556,7 +556,8 @@ def _trace_backpointers(source: Dict[str, torch.Tensor], def beam_search(self, source: Dict[str, torch.Tensor], reset: torch.ByteTensor, - k: int) -> Tuple[torch.Tensor, torch.Tensor]: + k: int, + **kwargs) -> Tuple[torch.Tensor, torch.Tensor]: """ Obtain the top-k (approximately) most likely predictions from the model using beam search. Unlike typical beam search all of the beam states are returned instead of just diff --git a/kglm/models/kglm_disc.py b/kglm/models/kglm_disc.py index ef81587..da9c0f8 100644 --- a/kglm/models/kglm_disc.py +++ b/kglm/models/kglm_disc.py @@ -755,8 +755,9 @@ def _next_related_entity_logp(self, next_encoded_head, next_encoded_relation, be self._recent_entities.load_beam_state(beam_state.recent_entities) for i, candidate_ids in enumerate(self._recent_entities._remaining): # Cast candidate ids to a tensor, lookup embeddings, and compute score. - candidate_ids = torch.LongTensor(list(candidate_ids.keys()), - device=next_encoded_head.device) + candidate_ids = torch.tensor(list(candidate_ids.keys()), + dtype=torch.int64, + device=next_encoded_head.device) candidate_embeddings = self._entity_embedder(candidate_ids) candidate_logits = torch.mv(candidate_embeddings, next_encoded_head[i]) candidate_logp = F.log_softmax(candidate_logits) @@ -767,10 +768,10 @@ def _next_related_entity_logp(self, next_encoded_head, next_encoded_relation, be # Stop early if node is isolated if not s: - logp_arr[i, j] = torch.FloatTensor([], device=next_encoded_head.device) - parent_ids_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) - relations_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) - raw_entity_ids_arr[i, j] = torch.LongTensor([], device=next_encoded_head.device) + logp_arr[i, j] = torch.tensor([], dtype=torch.float32, device=next_encoded_head.device) + parent_ids_arr[i, j] = torch.tensor([], dtype=torch.int64, device=next_encoded_head.device) + relations_arr[i, j] = torch.tensor([], dtype=torch.int64, device=next_encoded_head.device) + raw_entity_ids_arr[i, j] = torch.tensor([], dtype=torch.int64, device=next_encoded_head.device) continue # Otherwise compute relation probabilities for each parent and combine @@ -1020,7 +1021,8 @@ def beam_search(self, target: Dict[str, torch.Tensor], reset: torch.ByteTensor, metadata: Dict[str, Any], - k: int) -> Tuple[torch.Tensor, torch.Tensor]: + k: int, + **kwargs) -> Tuple[torch.Tensor, torch.Tensor]: """ Obtain the top-k (approximately) most likely predictions from the model using beam search. Unlike typical beam search all of the beam states are returned instead of just @@ -1084,6 +1086,7 @@ def beam_search(self, output = None for timestep in range(sequence_length): + logger.debug(timestep) # Get log probabilities of all next states next_mention_type_logp = self._next_mention_type_logp(mention_type_logits[:, timestep], beam_states)