From b9ac270dae6bc36ef247ffda658aa9dc6c08d286 Mon Sep 17 00:00:00 2001 From: Jelte van Boheemen Date: Wed, 15 May 2024 15:10:41 +0200 Subject: [PATCH] Remove manual override keys for uttid in ChatReader Resolves #30 --- corpus2alpino/readers/chat.py | 13 ++----------- tests/example_chat_expected.txt | 4 ++-- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/corpus2alpino/readers/chat.py b/corpus2alpino/readers/chat.py index 6fe5111..3331ca7 100644 --- a/corpus2alpino/readers/chat.py +++ b/corpus2alpino/readers/chat.py @@ -2,18 +2,16 @@ """ Module for reading CHAT cha files to parsable utterances. """ -from typing import cast, Dict, Iterable, List, Tuple +from typing import cast, Dict, Iterable, List from chamd import ChatReader as ChatParser, ChatLine, ChatTier -import os -import re from corpus2alpino.abstracts import Reader from corpus2alpino.models import CollectedFile, Document, MetadataValue, Utterance -MANUAL_IDS = ['xsid', 'xuid'] UTTERANCE_NUMBER_ID = 'uttno' + class ChatReader(Reader): """ Class for converting a CHAT file to document. @@ -31,13 +29,6 @@ def parse_utterances(self, chat_lines: List[ChatLine]): number = 0 for line in chat_lines: number += 1 # start numbering utterances from 1 - for id_override_key in MANUAL_IDS: - try: - line.uttid = line.tiers[id_override_key].text - line.metadata['uttid'].text = line.uttid - break - except KeyError: - pass yield Utterance(line.text, str(line.uttid), diff --git a/tests/example_chat_expected.txt b/tests/example_chat_expected.txt index 9211a62..720ac96 100644 --- a/tests/example_chat_expected.txt +++ b/tests/example_chat_expected.txt @@ -26,11 +26,11 @@ ##META text origutt = toen [//] wou zij de auto maken . 21792_23475 ##META text parsefile = PRELAN_example_chat_u00000000002.xml ##META int uttendlineno = 10 -##META int uttid = 42 +##META int uttid = 2 ##META int uttno = 2 ##META int uttstartlineno = 9 ##META text xsid = 42 -42|toen wou zij de auto maken . +2|toen wou zij de auto maken . ##META text origutt = maar toen reed de auto er vandoor . ##META text parsefile = PRELAN_example_chat_u00000000003.xml