From ee51238e7a25659ba688cc93eb3d11576470fe29 Mon Sep 17 00:00:00 2001 From: mehrad Date: Sat, 2 Oct 2021 23:17:02 -0700 Subject: [PATCH 1/7] almond_task: minor fix --- genienlp/tasks/almond_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/genienlp/tasks/almond_task.py b/genienlp/tasks/almond_task.py index f605dc02..c8960e5d 100644 --- a/genienlp/tasks/almond_task.py +++ b/genienlp/tasks/almond_task.py @@ -402,7 +402,7 @@ def batch_postprocess_prediction_ids(self, batch_example_ids, batch_src_ids, bat cross_att = cross_att[: len(tgt_tokens), : len(src_tokens)] # plot cross-attention heatmap - if self.args.plot_heatmaps: + if getattr(self.args, 'plot_heatmaps', False): import matplotlib.pyplot as plt import seaborn as sns From cfa84ab39c8669ce0c54d58fbdf85a296fff9e05 Mon Sep 17 00:00:00 2001 From: mehrad Date: Sun, 3 Oct 2021 00:23:41 -0700 Subject: [PATCH 2/7] server: support custom generation args --- genienlp/data_utils/numericalizer.py | 42 ++++++++++++++------------ genienlp/models/transformer_seq2seq.py | 41 +++++++++++++------------ genienlp/server.py | 40 +++++++++++++++++------- genienlp/util.py | 6 ++-- 4 files changed, 78 insertions(+), 51 deletions(-) diff --git a/genienlp/data_utils/numericalizer.py b/genienlp/data_utils/numericalizer.py index a65fce33..9e6c1f42 100644 --- a/genienlp/data_utils/numericalizer.py +++ b/genienlp/data_utils/numericalizer.py @@ -112,6 +112,7 @@ def __init__( self.max_generative_vocab = max_generative_vocab self._cache = args.embeddings self._tokenizer = None + self.config = config self._preprocess_special_tokens = args.preprocess_special_tokens @@ -126,6 +127,8 @@ def __init__( self._init_tokenizer(save_dir, config, src_lang, tgt_lang) + self.update_language_dependent_properties(src_lang, tgt_lang) + if save_dir is not None: logger.info(f'Loading the accompanying numericalizer from {save_dir}') self.load_extras(save_dir) @@ -179,25 +182,6 @@ def _init_tokenizer(self, save_dir, config, src_lang, tgt_lang): self._tokenizer = AutoTokenizer.from_pretrained(**tokenizer_args) - # some tokenizers like Mbart do not set src_lang and tgt_lan when initialized; take care of it here - self._tokenizer.src_lang = src_lang - self._tokenizer.tgt_lang = tgt_lang - - # define input prefix to add before every input text - input_prefix = '' - if isinstance(config, MarianConfig) and tgt_lang: - input_prefix = f'>>{tgt_lang}<< ' - # only older T5 models need task-specific input prefix - elif self._pretrained_name in T5_PRETRAINED_CONFIG_ARCHIVE_MAP.keys(): - assert src_lang == 'en' - if tgt_lang == 'en': - t5_task = 'summarization' - else: - t5_task = f'translation_en_to_{tgt_lang}' - input_prefix = config.task_specific_params[t5_task]['prefix'] - - self.input_prefix = input_prefix - # We only include the base tokenizers since `isinstance` checks for inheritance if isinstance(self._tokenizer, (BertTokenizer, BertTokenizerFast)): self._tokenizer.is_piece_fn = lambda wp: wp.startswith('##') @@ -223,6 +207,26 @@ def _init_tokenizer(self, save_dir, config, src_lang, tgt_lang): # make sure we assigned is_piece_fn assert self._tokenizer.is_piece_fn + def update_language_dependent_properties(self, src_lang, tgt_lang): + # some tokenizers like Mbart do not set src_lang and tgt_lan when initialized; take care of it here + self._tokenizer.src_lang = src_lang + self._tokenizer.tgt_lang = tgt_lang + + # define input prefix to add before every input text + input_prefix = '' + if isinstance(self.config, MarianConfig) and tgt_lang: + input_prefix = f'>>{tgt_lang}<< ' + # only older T5 models need task-specific input prefix + elif self._pretrained_name in T5_PRETRAINED_CONFIG_ARCHIVE_MAP.keys(): + assert src_lang == 'en' + if tgt_lang == 'en': + t5_task = 'summarization' + else: + t5_task = f'translation_en_to_{tgt_lang}' + input_prefix = self.config.task_specific_params[t5_task]['prefix'] + + self.input_prefix = input_prefix + def load_extras(self, save_dir): if self.max_generative_vocab is not None: with open(os.path.join(save_dir, 'decoder-vocab.txt'), 'r') as fp: diff --git a/genienlp/models/transformer_seq2seq.py b/genienlp/models/transformer_seq2seq.py index 713427b3..cfeb0c41 100644 --- a/genienlp/models/transformer_seq2seq.py +++ b/genienlp/models/transformer_seq2seq.py @@ -48,6 +48,7 @@ def __init__(self, config=None, *inputs, args, tasks, vocab_sets, save_directory If `save_directory` is None, will initialize a new model and numericalizer, otherwise, will load them from `save_directory` """ config = AutoConfig.from_pretrained(args.pretrained_model, cache_dir=args.embeddings) + self.config = config super().__init__(config) self.args = args args.dimension = config.d_model @@ -57,7 +58,7 @@ def __init__(self, config=None, *inputs, args, tasks, vocab_sets, save_directory # call this function after task is recognized if tasks: self.set_generation_output_options(tasks) - + # only used for Marian models. adjusted language codes passed to numericalizer will be None for models trained on single langauge pairs self.orig_src_lang, self.orig_tgt_lang = kwargs.get('src_lang', 'en'), kwargs.get('tgt_lang', 'en') self.src_lang, self.tgt_lang = adjust_language_code( @@ -81,26 +82,9 @@ def __init__(self, config=None, *inputs, args, tasks, vocab_sets, save_directory tasks=tasks, ) + self.update_language_dependent_configs(self.tgt_lang) self.model.resize_token_embeddings(self.numericalizer.num_tokens) - # set decoder_start_token_id for mbart - if self.model.config.decoder_start_token_id is None and isinstance( - self.numericalizer._tokenizer, (MBartTokenizer, MBartTokenizerFast) - ): - if isinstance(self.numericalizer._tokenizer, MBartTokenizer): - self.model.config.decoder_start_token_id = self.numericalizer._tokenizer.lang_code_to_id[self.tgt_lang] - else: - self.model.config.decoder_start_token_id = self.numericalizer._tokenizer.convert_tokens_to_ids(self.tgt_lang) - - # check decoder_start_token_id is set - if self.model.config.decoder_start_token_id is None: - raise ValueError("Make sure that decoder_start_token_id for the model is defined") - - # set forced_bos_token_id for certain multilingual models - if isinstance(self.numericalizer._tokenizer, MULTILINGUAL_TOKENIZERS): - forced_bos_token_id = self.numericalizer._tokenizer.lang_code_to_id[self.tgt_lang] - self.model.config.forced_bos_token_id = forced_bos_token_id - if args.dropper_ratio > 0: # lazy import since dropper is an optional dependency from loss_dropper import LossDropper @@ -115,6 +99,25 @@ def add_new_vocab_from_data(self, tasks, resize_decoder=False): super().add_new_vocab_from_data(tasks, resize_decoder) self.model.resize_token_embeddings(self.numericalizer.num_tokens) + def update_language_dependent_configs(self, tgt_lang): + # set decoder_start_token_id for mbart + if self.config.decoder_start_token_id is None and isinstance( + self.numericalizer._tokenizer, (MBartTokenizer, MBartTokenizerFast) + ): + if isinstance(self.numericalizer._tokenizer, MBartTokenizer): + self.config.decoder_start_token_id = self.numericalizer._tokenizer.lang_code_to_id[tgt_lang] + else: + self.config.decoder_start_token_id = self.numericalizer._tokenizer.convert_tokens_to_ids(tgt_lang) + + # check decoder_start_token_id is set + if self.config.decoder_start_token_id is None: + raise ValueError("Make sure that decoder_start_token_id for the model is defined") + + # set forced_bos_token_id for certain multilingual models + if isinstance(self.numericalizer._tokenizer, MULTILINGUAL_TOKENIZERS): + forced_bos_token_id = self.numericalizer._tokenizer.lang_code_to_id[tgt_lang] + self.config.forced_bos_token_id = forced_bos_token_id + def forward(self, *input, **kwargs): if self.training or kwargs.get('train', False): batch = input[0] diff --git a/genienlp/server.py b/genienlp/server.py index 65d1f82d..e25b157d 100644 --- a/genienlp/server.py +++ b/genienlp/server.py @@ -30,6 +30,7 @@ import asyncio +import copy import json import logging import os @@ -44,7 +45,7 @@ from .data_utils.example import Example, NumericalizedExamples from .ned.ned_utils import init_ned_model from .tasks.registry import get_tasks -from .util import get_devices, load_config_json, log_model_size, set_seed +from .util import adjust_language_code, get_devices, load_config_json, log_model_size, set_seed from .validate import generate_with_model logger = logging.getLogger(__name__) @@ -63,8 +64,8 @@ def parse_argv(parser): parser.add_argument('--port', default=8401, type=int, help='TCP port to listen on') parser.add_argument('--stdin', action='store_true', help='Interact on stdin/stdout instead of TCP') parser.add_argument('--database_dir', type=str, help='Database folder containing all relevant files') - parser.add_argument('--src_locale', default='en', help='locale tag of the input language to parse') - parser.add_argument('--tgt_locale', default='en', help='locale tag of the target language to generate') + parser.add_argument('--src_locale', help='locale tag of the input language to parse') + parser.add_argument('--tgt_locale', help='locale tag of the target language to generate') parser.add_argument('--inference_name', default='nlp', help='name used by kfserving inference service, alphanumeric only') # These are generation hyperparameters. Each one can be a list of values in which case, we generate `num_outputs` outputs for each set of hyperparameters. @@ -120,8 +121,21 @@ def numericalize_examples(self, ex): return NumericalizedExamples.collate_batches(all_features, self.numericalizer, device=self.device) def handle_request(self, request): + args = copy.deepcopy(self.args) + generation_options = request.get('options', {}) + for k, v in generation_options.items(): + setattr(args, k, v) + + # TODO handle this better by decoupling numericalizer and model + if hasattr(args, 'src_locale') and hasattr(args, 'tgt_locale'): + src_locale, tgt_locale = adjust_language_code( + self.model.config, self.args.pretrained_model, args.src_locale, args.tgt_locale + ) + self.numericalizer.update_language_dependent_properties(src_locale, tgt_locale) + self.model.update_language_dependent_configs(tgt_locale) + task_name = request['task'] if 'task' in request else 'generic' - task = list(get_tasks([task_name], self.args, self._cached_task_names).values())[0] + task = list(get_tasks([task_name], args, self._cached_task_names).values())[0] if task_name not in self._cached_task_names: self._cached_task_names[task_name] = task @@ -151,7 +165,7 @@ def handle_request(self, request): question = task.default_question ex = Example.from_raw( - str(example_id), context, question, answer, preprocess=task.preprocess_field, lower=self.args.lower + str(example_id), context, question, answer, preprocess=task.preprocess_field, lower=args.lower ) examples.append(ex) @@ -165,18 +179,18 @@ def handle_request(self, request): try: with torch.no_grad(): - if self.args.calibrator_paths is not None: + if args.calibrator_paths is not None: output = generate_with_model( self.model, [batch], self.numericalizer, task, - self.args, + args, output_predictions_only=True, confidence_estimators=self.confidence_estimators, ) response = [] - if sum(self.args.num_outputs) > 1: + if sum(args.num_outputs) > 1: for idx, predictions in enumerate(output.predictions): candidates = [] for cand in predictions: @@ -193,9 +207,9 @@ def handle_request(self, request): response.append(instance) else: output = generate_with_model( - self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True + self.model, [batch], self.numericalizer, task, args, output_predictions_only=True ) - if sum(self.args.num_outputs) > 1: + if sum(args.num_outputs) > 1: response = [] for idx, predictions in enumerate(output.predictions): candidates = [] @@ -222,7 +236,7 @@ def handle_json_request(self, line: str) -> str: assert len(response) == 1 response = response[0] response['id'] = request['id'] - return json.dumps(response) + '\n' + return json.dumps(response, ensure_ascii=False) + '\n' async def handle_client(self, client_reader, client_writer): try: @@ -274,6 +288,10 @@ def run(self): def init(args): load_config_json(args) check_and_update_generation_args(args) + if not args.src_locale: + args.src_locale = args.eval_src_languages + if not args.tgt_locale: + args.tgt_locale = args.eval_tgt_languages set_seed(args) devices = get_devices() diff --git a/genienlp/util.py b/genienlp/util.py index b508446c..b58c9244 100644 --- a/genienlp/util.py +++ b/genienlp/util.py @@ -728,7 +728,7 @@ def adjust_language_code(config, pretrained_model, src_lang, tgt_lang): src_lang = 'pes' else: raise ValueError( - 'Source language is not in this Marian model group languages, please specify the correct source language.' + f'Source language "{src_lang}" is not in this Marian model group languages, please specify the correct source language.' ) if model_is_marian and pretrained_model.rsplit('-', 1)[1] in MARIAN_GROUP_MEMBERS: @@ -739,7 +739,7 @@ def adjust_language_code(config, pretrained_model, src_lang, tgt_lang): tgt_lang = 'pes' else: raise ValueError( - 'Target language is not in this Marian model group languages, please specify the correct target language.' + f'Target language "{tgt_lang}" is not in this Marian model group languages, please specify the correct target language.' ) if model_is_marian and pretrained_model.rsplit('-', 2)[1] not in MARIAN_GROUP_MEMBERS: @@ -817,6 +817,8 @@ def load_config_json(args): 'crossner_domains', 'hf_test_overfit', 'override_valid_metrics', + 'eval_src_languages', + 'eval_tgt_languages', ] # train and predict scripts have these arguments in common. We use the values from train only if they are not provided in predict From 699b7844836014c722f94d7be62e160f3cf25459 Mon Sep 17 00:00:00 2001 From: mehrad Date: Wed, 6 Oct 2021 13:55:09 -0700 Subject: [PATCH 3/7] predict: assing eval_lang to pred_lang if not provided --- genienlp/predict.py | 11 +++++++---- tests/test_main_almond_multilingual.sh | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/genienlp/predict.py b/genienlp/predict.py index b16b48e7..5950c82b 100644 --- a/genienlp/predict.py +++ b/genienlp/predict.py @@ -110,7 +110,6 @@ def parse_argv(parser): type=str, nargs='+', dest='pred_src_languages', - default=['en'], help='Specify dataset source languages used during prediction for multilingual tasks' 'multiple languages for each task should be concatenated with +', ) @@ -118,7 +117,6 @@ def parse_argv(parser): '--pred_tgt_languages', type=str, nargs='+', - default=['en'], help='Specify dataset target languages used during prediction for multilingual tasks' 'multiple languages for each task should be concatenated with +', ) @@ -254,10 +252,15 @@ def set_default_values(args): def check_args(args): + if not args.pred_src_languages: + setattr(args, 'pred_src_languages', [args.eval_src_languages]) + if not args.pred_tgt_languages: + setattr(args, 'pred_tgt_languages', [args.eval_tgt_languages]) + if len(args.task_names) != len(args.pred_src_languages): raise ValueError( - 'You have to define prediction languages for each task' - 'Use None for single language tasks. Also provide languages in the same order you provided the tasks.' + 'You have to define prediction languages for each task.' + ' Use None for single language tasks. Also provide languages in the same order you provided the tasks.' ) if getattr(args, 'do_ned', False) and getattr(args, 'ned_retrieve_method', None) == 'bootleg': diff --git a/tests/test_main_almond_multilingual.sh b/tests/test_main_almond_multilingual.sh index 94f84ba4..290fd661 100755 --- a/tests/test_main_almond_multilingual.sh +++ b/tests/test_main_almond_multilingual.sh @@ -15,9 +15,9 @@ do # greedy decode # combined evaluation - genienlp predict --tasks almond_multilingual --pred_languages fa+en --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $EMBEDDING_DIR --skip_cache + genienlp predict --tasks almond_multilingual --pred_languages fa+en --pred_tgt_languages en --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $EMBEDDING_DIR --skip_cache # separate evaluation - genienlp predict --tasks almond_multilingual --separate_eval --pred_languages fa+en --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $EMBEDDING_DIR --skip_cache + genienlp predict --tasks almond_multilingual --separate_eval --pred_languages fa+en --pred_tgt_languages en --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $EMBEDDING_DIR --skip_cache # check if result file exists if test ! -f $workdir/model_$i/eval_results/test/almond_multilingual_en.tsv || test ! -f $workdir/model_$i/eval_results/test/almond_multilingual_fa.tsv || test ! -f $workdir/model_$i/eval_results/test/almond_multilingual_fa+en.tsv; then From b73312ba4033ae8c48e41a90ded018de3c244a64 Mon Sep 17 00:00:00 2001 From: mehrad Date: Wed, 6 Oct 2021 13:56:13 -0700 Subject: [PATCH 4/7] example: fix context_plus_question construction if either context or question is empty --- genienlp/data_utils/example.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/genienlp/data_utils/example.py b/genienlp/data_utils/example.py index 8d7cec2c..1891b399 100644 --- a/genienlp/data_utils/example.py +++ b/genienlp/data_utils/example.py @@ -150,7 +150,13 @@ def from_examples(examples: Iterable[Example], numericalizer): all_context_plus_question_features = [] for ex in examples: - context_plus_question = ex.context + sep_token + ex.question if len(ex.question) else ex.context + if not len(ex.question): + context_plus_question = ex.context + elif not len(ex.context): + context_plus_question = ex.question + else: + context_plus_question = ex.context + sep_token + ex.question + all_context_plus_questions.append(context_plus_question) # concatenate question and context features with a separator, but no need for a separator if there are no features to begin with From 122928f813c44d146e3a7d3a24aeb6868cc31a05 Mon Sep 17 00:00:00 2001 From: mehrad Date: Wed, 6 Oct 2021 16:21:37 -0700 Subject: [PATCH 5/7] almond_translate: accommodate enforcement of unique ids differently instead change ids on the go to be unique, lifting the burden from user or dataset --- genienlp/data_utils/almond_utils.py | 1 + genienlp/tasks/almond_task.py | 189 ++++++++++++++-------------- 2 files changed, 94 insertions(+), 96 deletions(-) diff --git a/genienlp/data_utils/almond_utils.py b/genienlp/data_utils/almond_utils.py index e70cf748..9233071a 100644 --- a/genienlp/data_utils/almond_utils.py +++ b/genienlp/data_utils/almond_utils.py @@ -245,6 +245,7 @@ def return_sentences(text, regex_pattern, src_char_spans, is_cjk=False): def split_text_into_sentences(text, lang, src_char_spans): + # text = '''the . " ${field} " . of . " ${value} " .''' if lang in ['en']: sentences = return_sentences(text, '(? 1: + examples = [] + for i, text in enumerate(contexts): + ex_id, text = self.construct_id2span_mapping(self.name + '/' + example_id + f'@{i}', text, 'context') + examples.append( + Example.from_raw( + ex_id, + text, + question, + answer, + preprocess=self.preprocess_field, + lower=False, + ) + ) + else: + ex_id, context = self.construct_id2span_mapping(self.name + '/' + example_id, context, 'context') + examples = Example.from_raw(ex_id, context, question, answer, preprocess=self.preprocess_field, lower=False) + + return examples + def batch_postprocess_prediction_ids(self, batch_example_ids, batch_src_ids, batch_tgt_ids, **kwargs): numericalizer = kwargs.pop('numericalizer') cross_attentions = kwargs.pop('cross_attentions') tgt_lang = kwargs.pop('tgt_lang') @@ -442,69 +502,6 @@ def batch_postprocess_prediction_ids(self, batch_example_ids, batch_src_ids, bat return partial_batch_prediction_ids, all_text_outputs - def _make_example(self, parts, dir_name=None, **kwargs): - # answer has to be provided by default unless doing prediction - no_answer = getattr(self.args, 'translate_no_answer', False) - split_sentence = getattr(self.args, 'translate_example_split', False) - src_lang = kwargs.get('src_lang', 'en') - - example_id = 'id-null' - question = 'translate from input to output' - - if no_answer: - if len(parts) == 1: - context = parts - elif len(parts) == 2: - example_id, context = parts - elif len(parts) == 3: - example_id, context, question = parts - elif len(parts) == 4: - raise ValueError(f'Input file contains a line with {len(parts)} parts: {str(parts)}') - else: - if len(parts) == 2: - context, answer = parts - elif len(parts) == 3: - example_id, context, answer = parts - elif len(parts) == 4: - example_id, context, question, answer = parts - else: - raise ValueError(f'Input file contains a line with {len(parts)} parts: {str(parts)}') - - # no answer is provided - if no_answer: - answer = '.' - - contexts = [] - src_char_spans = None - if split_sentence: - if self.args.do_alignment: - src_quotation_symbol = '"' - src_char_spans_ind = [index for index, char in enumerate(context) if char == src_quotation_symbol] - src_char_spans = [ - (src_char_spans_ind[i], src_char_spans_ind[i + 1]) for i in range(0, len(src_char_spans_ind), 2) - ] - contexts = split_text_into_sentences(context, src_lang, src_char_spans) - - if len(contexts) > 1: - examples = [] - for i, text in enumerate(contexts): - examples.append( - Example.from_raw( - self.name + '/' + example_id + f'@{i}', - text, - question, - answer, - preprocess=self.preprocess_field, - lower=False, - ) - ) - else: - examples = Example.from_raw( - self.name + '/' + example_id, context, question, answer, preprocess=self.preprocess_field, lower=False - ) - - return examples - @register_task('contextual_almond') class ContextualAlmond(BaseAlmondTask): From 50f6954676c84098786cf170da65fb67ac6b4572 Mon Sep 17 00:00:00 2001 From: mehrad Date: Sun, 10 Oct 2021 16:38:56 -0700 Subject: [PATCH 6/7] server: only allow certain args to be overriden --- genienlp/server.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/genienlp/server.py b/genienlp/server.py index e25b157d..15941a80 100644 --- a/genienlp/server.py +++ b/genienlp/server.py @@ -50,6 +50,25 @@ logger = logging.getLogger(__name__) +GENERATION_ARGUMENTS = { + 'num_beams', + 'num_beam_groups', + 'diversity_penalty', + 'num_outputs', + 'no_repeat_ngram_size', + 'top_p', + 'top_k', + 'repetition_penalty', + 'temperature', + 'max_output_length', + 'src_locale', + 'tgt_locale', + 'do_alignment', + 'align_preserve_input_quotation', + 'align_remove_output_quotation', + 'translate_example_split', +} + def parse_argv(parser): parser.add_argument('--path', type=str, required=True) @@ -124,6 +143,9 @@ def handle_request(self, request): args = copy.deepcopy(self.args) generation_options = request.get('options', {}) for k, v in generation_options.items(): + if k not in GENERATION_ARGUMENTS: + logger.warning(f'{k} is not a generation option and cannot be overriden') + continue setattr(args, k, v) # TODO handle this better by decoupling numericalizer and model From 934f77cb98431ce1baea01399a473a265952c033 Mon Sep 17 00:00:00 2001 From: mehrad Date: Mon, 11 Oct 2021 18:01:51 -0700 Subject: [PATCH 7/7] setup: pin ray to 1.6.0 Recent release of ray (1.7.0) is breaking our tests due to some import errors. Will revert this change once a new patch is pushed. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e109bde1..d89e1c43 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ 'pathos==0.2.8', # for kf: 'kfserving>=0.5.0', + 'ray==1.6.0', # for NED: 'bootleg==1.0.5', # for calibration: