diff --git a/.travis.yml b/.travis.yml index e98ce07..2bb6b76 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: install: - python setup.py -q install - - pip install pylint pydocstyle pytest pytest-cov coveralls + - pip install pylint pydocstyle pytest pytest-cov==2.5.0 coveralls script: - python -m pytest --cov=pyfn tests/ diff --git a/scripts/frameid.embed.sh b/scripts/frameid.embed.sh new file mode 100644 index 0000000..a8b469a --- /dev/null +++ b/scripts/frameid.embed.sh @@ -0,0 +1,145 @@ +#!/bin/bash + +source "$(dirname "${BASH_SOURCE[0]}")/setup.sh" + +show_help() { +cat << EOF +Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-p {semafor,open-sesame}] +Perform frame identification. + + -h, --help display this help and exit + -m, --mode train on all models or decode using a single model + -x, --xp XP_NUM xp number written as 3 digits (e.g. 001) + -p, --parser {semafor,open-sesame} formalize decoded frames for specified parser + -e, --embed name of embeddings to use +EOF +} + +is_xp_set=FALSE +is_mode_set=FALSE +is_parser_set=FALSE +is_embed_set=FALSE + +while :; do + case $1 in + -h|-\?|--help) + show_help + exit + ;; + -x|--xp) + if [ "$2" ]; then + is_xp_set=TRUE + xp="xp_$2" + shift + else + die "ERROR: '--xp' requires a non-empty option argument" + fi + ;; + -m|--mode) + if [ "$2" ]; then + is_mode_set=TRUE + mode=$2 + shift + else + die "ERROR: '--mode' requires a non-empty option argument" + fi + ;; + -p|--parser) + if [ "$2" ]; then + is_parser_set=TRUE + parser=$2 + shift + else + die "ERROR: '--parser' requires a non-empty option argument" + fi + ;; + -e|--embed) + if [ "$2" ]; then + is_embed_set=TRUE + embed=$2 + shift + else + die "ERROR: '--embed' requires a non-empty option argument" + fi + ;; + --) + shift + break + ;; + -?*) + printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 + ;; + *) + break + esac + shift +done + +if [ "${is_xp_set}" = FALSE ]; then + die "ERROR: '--xp' parameter is required." +fi + +if [ "${is_mode_set}" = FALSE ]; then + die "ERROR: '--mode' parameter is required." +fi + +prepare() { + echo "Preparing files for frame identification..." + + mkdir ${XP_DIR}/${xp}/frameid 2> /dev/null + mkdir ${XP_DIR}/${xp}/frameid/data 2> /dev/null + mkdir ${XP_DIR}/${xp}/frameid/data/embeddings 2> /dev/null + mkdir ${XP_DIR}/${xp}/frameid/data/corpora 2> /dev/null + mkdir ${XP_DIR}/${xp}/frameid/data/lexicons 2> /dev/null + + cp ${XP_DIR}/${xp}/data/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/ + cp ${XP_DIR}/${xp}/data/test.sentences.conllx ${XP_DIR}/${xp}/frameid/data/corpora/ + cp ${XP_DIR}/${xp}/data/train.frame.elements ${XP_DIR}/${xp}/frameid/data/corpora/ + cp ${XP_DIR}/${xp}/data/train.sentences.conllx.flattened ${XP_DIR}/${xp}/frameid/data/corpora/ + + cp ${RESOURCES_DIR}/${embed} ${XP_DIR}/${xp}/frameid/data/embeddings/ + + mv ${XP_DIR}/${xp}/frameid/data/corpora/test.frames ${XP_DIR}/${xp}/frameid/data/corpora/test.frame.elements + + bash ${SCRIPTS_DIR}/flatten.sh -f ${XP_DIR}/${xp}/frameid/data/corpora/test.sentences.conllx + + python3 ${SIMFRAMEID_HOME}/generate.py ${XP_DIR}/${xp}/frameid/data/corpora/train.frame.elements ${XP_DIR}/${xp}/frameid/data/lexicons/fn_lexicon + + echo "Done" +} + +if [ "${mode}" = train ]; then + prepare + echo "Training frame identification on all models..." + python ${SIMFRAMEID_HOME}/simpleFrameId/main.py train ${XP_DIR}/${xp}/frameid ${embed} + echo "Done" +fi + +if [ "${mode}" = decode ]; then + if [ "${is_parser_set}" = FALSE ]; then + die "ERROR: '--parser' parameter is required." + fi + case "${parser}" in + semafor ) + ;; #fallthru + open-sesame ) + ;; #fallthru + * ) + die "Invalid frame semantic parser '${parser}': Should be 'semafor' or 'open-sesame'" + esac + prepare + echo "Predicting frames..." + python ${SIMFRAMEID_HOME}/simpleFrameId/main.py decode ${XP_DIR}/${xp}/frameid ${embed} + echo "Done" + if [ "${parser}" = semafor ]; then + cut -f 1-3 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.1.txt + cut -f 5-8 ${XP_DIR}/${xp}/data/test.frames > ${XP_DIR}/${xp}/data/test.frames.cut.2.txt + paste ${XP_DIR}/${xp}/data/test.frames.cut.1.txt ${XP_DIR}/${xp}/frameid/test.frames.predicted ${XP_DIR}/${xp}/data/test.frames.cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${XP_DIR}/${xp}/data/test.frames + rm ${XP_DIR}/${xp}/data/test.frames.cut.1.txt + rm ${XP_DIR}/${xp}/data/test.frames.cut.2.txt + fi + if [ "${parser}" = open-sesame ]; then + python3 CoNLLizer.py merger -c ${XP_DIR}/${xp}/data/test.bios.semeval -P ${XP_DIR}/${xp}/frameid/test.frames.predicted -n 14 -N 1 > ${XP_DIR}/${xp}/data/test.bios.semeval.merged + mv ${XP_DIR}/${xp}/data/test.bios.semeval.merged ${XP_DIR}/${xp}/data/test.bios.semeval + fi +fi diff --git a/scripts/open-sesame.embed.sh b/scripts/open-sesame.embed.sh new file mode 100644 index 0000000..ac28357 --- /dev/null +++ b/scripts/open-sesame.embed.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +source "$(dirname "${BASH_SOURCE[0]}")/setup.sh" + +show_help() { +cat << EOF +Usage: ${0##*/} [-h] -m {train,decode} -x XP_NUM [-s {dev,test}] [-d] [-u] +Train or decode with the OPEN-SESAME parser. + + -h, --help display this help and exit + -m, --mode {train,decode} open-sesame mode to use: train or decode + -x, --xp XP_NUM xp number written as 3 digits (e.g. 001) + -s, --splits {dev,test} which splits to use in decode mode: dev or test + -d, --with_dep_parses if specified, parser will use dependency parses + -u, --with_hierarchy if specified, parser will use the hierarchy feature + -e, --embed name of embeddings to use +EOF +} + +is_mode_set=FALSE +is_xp_set=FALSE +is_splits_set=FALSE +with_dep_parses=FALSE +is_embed_set=FALSE + +while :; do + case $1 in + -h|-\?|--help) + show_help + exit + ;; + -m|--mode) + if [ "$2" ]; then + is_mode_set=TRUE + mode=$2 + shift + else + die "ERROR: '--mode' requires a non-empty option argument" + fi + ;; + -x|--xp) + if [ "$2" ]; then + is_xp_set=TRUE + xp="xp_$2" + shift + else + die "ERROR: '--xpdir' requires a non-empty option argument" + fi + ;; + -s|--splits) + if [ "$2" ]; then + is_splits_set=TRUE + splits=$2 + shift + else + die "ERROR: '--splits' requires a non-empty option argument" + fi + ;; + -d|--with_dep_parses) + with_dep_parses=TRUE + ;; + -e|--embed) + if [ "$2" ]; then + is_embed_set=TRUE + embed=$2 + shift + else + die "ERROR: '--embed' requires a non-empty option argument" + fi + ;; + --) + shift + break + ;; + -?*) + printf 'WARN: Unknown option (ignored): %s\n' "$1" >&2 + ;; + *) + break + esac + shift +done + +if [ "${is_mode_set}" = FALSE ]; then + die "ERROR: '--mode' parameter is required" +fi + +if [ "${is_xp_set}" = FALSE ]; then + die "ERROR: '--xp' parameter is required" +fi + +case "${mode}" in + train ) + ;; + decode ) + ;; + * ) + die "Invalid mode '${mode}': should be 'train' or 'decode'" +esac + +if [ "${mode}" = decode ]; then + if [ "${is_splits_set}" = FALSE ]; then + die "ERROR: '--splits' parameter is required for decoding" + fi + case "${splits}" in + dev ) + ;; + test ) + ;; + * ) + die "Invalid splits '${splits}': should be 'dev' or 'test'" + esac +fi + +mkdir ${XP_DIR}/${xp}/model 2> /dev/null + +postprocess_decoded_file() { + BIOS_FILE=$1 + DECODED_FILE=$2 + OUTPUT_TMP_DIR="/tmp/biospost" + + rm -rf $OUTPUT_TMP_DIR 2> /dev/null + mkdir $OUTPUT_TMP_DIR 2> /dev/null + + cut -f 1-14 ${BIOS_FILE} > ${OUTPUT_TMP_DIR}/cut.1.txt + cut -f 15 ${DECODED_FILE} > ${OUTPUT_TMP_DIR}/cut.2.txt + + paste ${OUTPUT_TMP_DIR}/cut.1.txt ${OUTPUT_TMP_DIR}/cut.2.txt | perl -pe "s/^\t+$//g" | cat -s > ${DECODED_FILE} + + rm -rf $OUTPUT_TMP_DIR; +} + +if [ "${mode}" = train ]; then + if [ "${with_dep_parses}" = TRUE ]; then + python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ + --model ${XP_DIR}/${xp}/model/segrnn.argid.model \ + --trainf ${XP_DIR}/${xp}/data/train.bios \ + --devf ${XP_DIR}/${xp}/data/dev.bios \ + --vecf ${RESOURCES_DIR}/${embed} \ + --syn dep + fi + if [ "${with_dep_parses}" = FALSE ]; then + python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ + --model ${XP_DIR}/${xp}/model/segrnn.argid.model \ + --trainf ${XP_DIR}/${xp}/data/train.bios \ + --devf ${XP_DIR}/${xp}/data/dev.bios \ + --vecf ${RESOURCES_DIR}/${embed} + fi +fi + +if [ "${mode}" = decode ]; then + if [ "${with_dep_parses}" = TRUE ]; then + python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ + --mode test \ + --model ${XP_DIR}/${xp}/model/segrnn.argid.model \ + --trainf ${XP_DIR}/${xp}/data/train.bios \ + --testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \ + --vecf ${RESOURCES_DIR}/${embed} \ + --syn dep + fi + if [ "${with_dep_parses}" = FALSE ]; then + python ${OPEN_SESAME_HOME}/src/segrnn-argid.py \ + --mode test \ + --model ${XP_DIR}/${xp}/model/segrnn.argid.model \ + --trainf ${XP_DIR}/${xp}/data/train.bios \ + --testf ${XP_DIR}/${xp}/data/${splits}.bios.semeval \ + --vecf ${RESOURCES_DIR}/${embed} + fi + postprocess_decoded_file ${XP_DIR}/${xp}/data/${splits}.bios.semeval ${XP_DIR}/${xp}/data/${splits}.bios.semeval.decoded +fi diff --git a/setup.py b/setup.py index 5cbb393..a841012 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ author_email='akb@3azouz.net', long_description=long_description, long_description_content_type='text/markdown', - version='1.2.6', + version='1.3.0', url='https://gitlab.com/akb89/pyfn', download_url='https://pypi.org/project/pyfn/#files', license='MIT',