Skip to content

Commit

Permalink
Merge pull request #8 from akb89/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
akb89 authored Aug 30, 2018
2 parents 466a4af + c20c8ec commit 2276d30
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 171 deletions.
608 changes: 585 additions & 23 deletions README.md

Large diffs are not rendered by default.

135 changes: 0 additions & 135 deletions REPLICATION.md

This file was deleted.

3 changes: 2 additions & 1 deletion pyfn/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def _convert(args):
raise InvalidParameterError(
'Source and Target paths are the same! Please specify different '
'source/target paths')
# TODO: add validation for input directory structure
if args.source_format == 'fnxml':
annosets_dict = fnxml.get_annosets_dict(args.source_path,
args.splits,
Expand All @@ -70,6 +69,8 @@ def _convert(args):
'need to specify the --sent parameter pointing at the '
'.sentences file absolute filepath')
annosets = semaforu.unmarshall_annosets(args.source_path, args.sent)
## Starting marshalling
os.makedirs(args.target_path, exist_ok=True)
if args.target_format == 'bios':
biosm.marshall_annosets_dict(annosets_dict, args.target_path,
args.filter, args.output_sentences,
Expand Down
17 changes: 16 additions & 1 deletion pyfn/marshalling/marshallers/bios.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,22 @@ def _marshall_bios(annosets, filtering_options, sent_dict, bios_filepath,
def marshall_annosets_dict(annosets_dict, target_dirpath, filtering_options,
output_sentences, excluded_frames,
excluded_sentences, excluded_annosets):
"""Convert a dict of {splits:pyfn.AnnotationSet} to BIOS splits files."""
"""Convert a dict of {splits:pyfn.AnnotationSet} to BIOS splits files.
Args
----
annosets_dict: a splits to annosets dictionary (as generated by
the framenet unmarshaller).
target_dirpath: the absolute path to the target directory where to
save the output file(s)
filtering_options: a list of options to pass to the pyfn.utils.filter.
('overlap_fes', 'disc_fes', 'disc_targets', 'no_fes', 'non_breaking_spaces')
output_sentences: True or False. Whether or not to also output a .sentences file
listing all sentences (string), one per line.
excluded_frames: a list of frame #id to exclude from the output
excluded_sentences: a list of sentence #id to exclude from the output
excluded_annosets: a list of annotationset #id to exclude from the output
"""
for splits_name, annosets in annosets_dict.items():
bios_filepath = files_utils.get_bios_filepath(target_dirpath,
splits_name)
Expand Down
14 changes: 14 additions & 0 deletions pyfn/marshalling/marshallers/semafor.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,20 @@ def marshall_annosets_dict(annosets_dict, target_dirpath, filtering_options,
both frame and frame element labels depending on filtering options.
The dev/test splits will be converted to a .frames file containing
frame labels only.
Args
----
annosets_dict: a splits to annosets dictionary (as generated by
the framenet unmarshaller).
target_dirpath: the absolute path to the target directory where to
save the output file(s)
filtering_options: a list of options to pass to the pyfn.utils.filter.
('overlap_fes', 'disc_fes', 'disc_targets', 'no_fes', 'non_breaking_spaces')
output_sentences: True or False. Whether or not to also output a .sentences file
listing all sentences (string), one per line.
excluded_frames: a list of frame #id to exclude from the output
excluded_sentences: a list of sentence #id to exclude from the output
excluded_annosets: a list of annotationset #id to exclude from the output
"""
for splits_name, annosets in annosets_dict.items():
logger.info('Marshalling {} splits to semafor format'
Expand Down
9 changes: 8 additions & 1 deletion pyfn/marshalling/marshallers/semeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,14 @@ def _marshall_annosets(annosets, output_filepath, excluded_frames,

def marshall_annosets(annosets, output_filepath, excluded_frames,
excluded_sentences, excluded_annosets):
"""Marshall a list of pyfn.AnnotationSet objects to SEMEVAL XML."""
"""Marshall a list of pyfn.AnnotationSet objects to SEMEVAL XML.
annosets: a list of annosets to marshall.
output_filepath: the absolute path to the output .xml file
excluded_frames: a list of frame #id to exclude from the output
excluded_sentences: a list of sentence #id to exclude from the output
excluded_annosets: a list of annotationset #id to exclude from the output
"""
logger.info('Marshalling pyfn.AnnotationSet objects to SEMEVAL XML...')
if not annosets:
raise InvalidParameterError('Input pyfn.AnnotationSet list is empty')
Expand Down
9 changes: 8 additions & 1 deletion pyfn/marshalling/unmarshallers/framenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pyfn.utils.filter as f_utils
import pyfn.utils.xml as xml_utils

from pyfn.exceptions.parameter import InvalidParameterError
from pyfn.exceptions.xml import XMLProcessingError

from pyfn.models.annotationset import AnnotationSet
Expand Down Expand Up @@ -310,7 +311,7 @@ def _extract_ft_annosets(ft_filepaths, fe_dict, flatten=False):


def extract_annosets(splits_dirpath, with_fulltexts, with_exemplars,
fe_dict, flatten=False):
fe_dict=None, flatten=False):
"""Return a list of pyfn.AnnotationSet extracted from splits paths.
The splits directory should contain two subdirectories name 'fulltext'
Expand All @@ -321,6 +322,8 @@ def extract_annosets(splits_dirpath, with_fulltexts, with_exemplars,
"""
logger.info('Extracting pyfn.AnnotationSet items from {}'
.format(splits_dirpath))
if fe_dict is None:
fe_dict = {}
ft_annosets = []
ex_annosets = []
if with_fulltexts:
Expand Down Expand Up @@ -387,6 +390,10 @@ def _get_fe_dict(frame_xml_filepaths):


def _get_annosets_dict_from_fn_xml(fn_splits_dirpath, splits, with_exemplars):
if splits not in ('train', 'dev', 'test'):
raise InvalidParameterError(
'Invalid splits name `{}`. Should be `train`, `dev` or `test`'
.format(splits))
fe_dict = _get_fe_dict(xml_utils.get_xml_filepaths(fn_splits_dirpath,
'frame'))
if splits == 'test':
Expand Down
6 changes: 4 additions & 2 deletions pyfn/marshalling/unmarshallers/semeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import pyfn.marshalling.unmarshallers.framenet as fn_unmarshaller

__all__ = ['unmarshall_semeval07_xml']
__all__ = ['unmarshall_annosets']

logger = logging.getLogger(__name__)


def unmarshall_semeval07_xml(xml_filepath, fe_dict, flatten=False):
def unmarshall_annosets(xml_filepath, fe_dict=None, flatten=False):
"""Unmarshall a SemEval 2007 FrameNet XML file from file path.
Return a generator of AnnotationSet instances extracted from the
Expand All @@ -24,6 +24,8 @@ def unmarshall_semeval07_xml(xml_filepath, fe_dict, flatten=False):
"""
logger.info('Unmarshalling SemEval FrameNet XML file: {}'
.format(xml_filepath))
if fe_dict is None:
fe_dict = {}
# pylint: disable=R1702
for documents_tag in etree.parse(xml_filepath).getroot().findall(
'documents'):
Expand Down
2 changes: 1 addition & 1 deletion pyfn/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,5 @@ def get_rolemappings_filepath(target_dirpath):


def get_fr_relation_xml_filepath(splits_dirpath):
"""Return the absolute path to the frRelation.xl file given splits_dirpath."""
"""Return the absolute path to the frRelation.xml file given splits_dirpath."""
return os.path.join(splits_dirpath, 'frRelation.xml')
2 changes: 1 addition & 1 deletion scripts/score.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/setup.sh"
show_help() {
cat << EOF
Usage: ${0##*/} [-h] -x XP_NUM -p {semafor,open-sesame} -s {dev,test} -f {gold,predicted}
Score frame semantic parsing with the SEMEVAL scoring scripts modified by Kshirsagar et al. (2015).
Score frame semantic parsing with a modified version of the SEMEVAL scoring script.
-h, --help display this help and exit
-x, --xp XP_NUM xp number written as 3 digits (e.g. 001)
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

setup(
name='pyfn',
description='A python module to process FrameNet XML data',
description='A python module to process data for Frame Semantic Parsing',
author='Alexandre Kabbach',
author_email='[email protected]',
long_description=long_description,
long_description_content_type='text/markdown',
version='1.1.1',
url='https://gitlab.unige.ch/akb/pyfn',
version='1.2.0',
url='https://gitlab.com/akb89/pyfn',
download_url='https://pypi.org/project/pyfn/#files',
license='MIT',
keywords=['framenet', 'xml', 'marshalling', 'unmarshalling'],
Expand All @@ -37,7 +37,7 @@
'pyfn = pyfn.main:main'
],
},
tests_require=['pytest==3.7.2', 'pylint==2.1.1', 'pytest-cov==2.5.1',
tests_require=['pytest==3.7.4', 'pylint==2.1.1', 'pytest-cov==2.5.1',
'pydocstyle==2.1.1'],
install_requires=['PyYAML==3.13', 'mmh3==2.5.1', 'lxml==4.2.4',
'pytz==2018.5'],
Expand Down
2 changes: 1 addition & 1 deletion tests/test_unmarshallers_semeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

SEMEVAL_XML_FILE = os.path.join(os.path.dirname(__file__), 'resources', 'semeval.xml')

semeval_annosets_list = list(semeval_unmarshaller.unmarshall_semeval07_xml(SEMEVAL_XML_FILE, {}))
semeval_annosets_list = list(semeval_unmarshaller.unmarshall_annosets(SEMEVAL_XML_FILE, {}))


def test_semeval_annoset():
Expand Down

0 comments on commit 2276d30

Please sign in to comment.