From 7a29c5c4d8817b8f069e3ae88610492ebbaa19b9 Mon Sep 17 00:00:00 2001 From: Maarten van Gompel Date: Wed, 1 Nov 2023 17:19:57 +0100 Subject: [PATCH] folia2stam: convert declarations as annotations on datasets --- foliatools/folia2stam.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/foliatools/folia2stam.py b/foliatools/folia2stam.py index 31f7a1d..2afab45 100644 --- a/foliatools/folia2stam.py +++ b/foliatools/folia2stam.py @@ -43,6 +43,19 @@ def convert(f, annotationstore: stam.AnnotationStore, **kwargs): for key, value in doc.metadata.items(): annotationstore.annotate(target=selector, data={"key":key,"value":value,"set":"metadata"}) #TODO: make metadata set configurable + for annotationtype, foliaset in doc.annotations: + if foliaset: + try: + dataset = annotationstore.dataset(foliaset) + except stam.StamError: + dataset = annotationstore.add_dataset(foliaset) + selector = stam.Selector.datasetselector(dataset) + value = folia.annotationtype2str(annotationtype) + if value: + value = value.lower() + annotationstore.annotate(target=selector, data=[{"key":"declaration", "value": f"{value}-annotation", "set": FOLIA_NAMESPACE},{"key":"annotationtype", "value": value, "set": FOLIA_NAMESPACE}]) + + def convert_tokens(doc: folia.Document, annotationstore: stam.AnnotationStore, **kwargs) -> stam.TextResource: """Convert FoLiA tokens (w) and text content to STAM. Returns a STAM resource"""