From 1232506a07737930023d8f08e4e19054dcf19990 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 18 Jul 2024 20:19:07 -0600 Subject: [PATCH 01/22] framework: initial implementation of FlowController and related classes --- src/framework/Makefile.am | 4 +- src/framework/flow.cpp | 77 +++++++++++++ src/framework/flow_controller.cpp | 107 +++++++++++++++++ src/framework/uima/flow.hpp | 153 +++++++++++++++++++++++++ src/framework/uima/flow_controller.hpp | 115 +++++++++++++++++++ 5 files changed, 455 insertions(+), 1 deletion(-) create mode 100644 src/framework/flow.cpp create mode 100644 src/framework/flow_controller.cpp create mode 100644 src/framework/uima/flow.hpp create mode 100644 src/framework/uima/flow_controller.hpp diff --git a/src/framework/Makefile.am b/src/framework/Makefile.am index 3bdaf9e..b3490c9 100644 --- a/src/framework/Makefile.am +++ b/src/framework/Makefile.am @@ -78,7 +78,9 @@ libuima_la_SOURCES += consoleui.cpp libuima_la_SOURCES += cp2ucnvrt.cpp libuima_la_SOURCES += dottypesystemwriter.cpp libuima_la_SOURCES += engine.cpp -libuima_la_SOURCES += exceptions.cpp +libuima_la_SOURCES += exceptions.cpp +libuima_la_SOURCES += flow_controller.cpp +libuima_la_SOURCES += flow.cpp libuima_la_SOURCES += ftools.cpp libuima_la_SOURCES += internal_aggregate_engine.cpp libuima_la_SOURCES += internal_capability_container.cpp diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp new file mode 100644 index 0000000..3b6dfd2 --- /dev/null +++ b/src/framework/flow.cpp @@ -0,0 +1,77 @@ +/** \file flow_controller.cpp . +----------------------------------------------------------------------------- + + + + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + +----------------------------------------------------------------------------- +----------------------------------------------------------------------------- + + +-------------------------------------------------------------------------- */ + +#include "uima/flow.hpp" + +namespace uima { + Step::Step(const internal::SimpleStep &simpleStep): step(simpleStep), + type(StepType::SIMPLESTEP) { + } + + Step::Step(const internal::ParallelStep ¶llelStep): step(parallelStep), + type(StepType::PARALLELSTEP) { + } + + Step::Step(const internal::FinalStep &finalStep): step(finalStep), + type(StepType::FINALSTEP) { + } + + Step::~Step() { + switch (type) { + case StepType::SIMPLESTEP: + step.simpleStep.~SimpleStep(); + break; + case StepType::FINALSTEP: + step.finalStep.~FinalStep(); + break; + case StepType::PARALLELSTEP: + step.parallelStep.~ParallelStep(); + break; + } + } + + const internal::SimpleStep * Step::getSimpleStep() const { + if (type != StepType::SIMPLESTEP) return nullptr; + return &step.simpleStep; + } + + const internal::ParallelStep * Step::getParallelStep() const { + if (type != StepType::PARALLELSTEP) return nullptr; + return &step.parallelStep; + } + + const internal::FinalStep * Step::getFinalStep() const { + if (type != StepType::FINALSTEP) return nullptr; + return &step.finalStep; + } + + Step::StepType Step::getType() const { + return type; + } +} diff --git a/src/framework/flow_controller.cpp b/src/framework/flow_controller.cpp new file mode 100644 index 0000000..ae9c543 --- /dev/null +++ b/src/framework/flow_controller.cpp @@ -0,0 +1,107 @@ +/** \file flow_controller.cpp . +----------------------------------------------------------------------------- + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + +----------------------------------------------------------------------------- + + Description: Implementation of FixedFlowController and FixedFlowObject + +----------------------------------------------------------------------------- */ + + +#include "uima/flow_controller.hpp" + +namespace uima { + Step FixedFlowObject::next() { + if (wasPassedToCASMultiplier) { + switch (flowController->getAction()) { + case FixedFlowController::ActionAfterCasMultiplier::CONTINUE: + break; + case FixedFlowController::ActionAfterCasMultiplier::STOP: + return Step(internal::FinalStep()); + case FixedFlowController::ActionAfterCasMultiplier::DROP: + return Step(internal::FinalStep(internallyCreatedCAS)); + case FixedFlowController::ActionAfterCasMultiplier::DROP_IF_NEW_CAS_PRODUCED: + if (newCASProduced) + return Step(internal::FinalStep(internallyCreatedCAS)); + break; + } + wasPassedToCASMultiplier = newCASProduced = false; + } + + const std::vector& delegateKeys = flowController->getDelegateKeys(); + if (currentStep >= delegateKeys.size()) + return Step(internal::FinalStep()); + + const icu::UnicodeString &engineName = delegateKeys[currentStep]; + const AnnotatorContext* engineContext = flowController->getDelegateSpecifierMap().at(engineName); + const AnalysisEngineMetaData* engineMetadata = engineContext->getTaeSpecifier().getAnalysisEngineMetaData(); + + if (engineMetadata->getOperationalProperties()->getOutputsNewCASes()) + wasPassedToCASMultiplier = true; + + return Step(internal::SimpleStep(delegateKeys[currentStep++])); + } + + std::unique_ptr FixedFlowObject::newCasProduced(const CAS &cas, const icu::UnicodeString &producedBy) { + newCASProduced = true; + const std::vector& delegateKeys = flowController->getDelegateKeys(); + + int i = 0; + for (; i < delegateKeys.size(); ++i) { + if (producedBy == delegateKeys[i]) break; + } + + return std::make_unique(flowController, i+1, true); + } + +/* -------------------------------------------------------------------------------------------------------------- */ +/* FixedFlowController implementation */ +/* -------------------------------------------------------------------------------------------------------------- */ + void FixedFlowController::initialize(const AnnotatorContext &anContext) { + annotatorContext = &anContext; + delegateSpecifierMap = &anContext.getDelegates(); + flowContraints = anContext.getTaeSpecifier().getAnalysisEngineMetaData()->getFixedFlow(); + } + + void FixedFlowController::destroy() { + } + + void FixedFlowController::reconfigure() { + if (annotatorContext) + initialize(*annotatorContext); + } + + std::unique_ptr FixedFlowController::computeFlow(const CAS&) { + return std::make_unique(this, 0); + } + + const std::vector & FixedFlowController::getDelegateKeys() const { + return flowContraints->getNodes(); + } + + const std::map & FixedFlowController::getDelegateSpecifierMap() const { + return *delegateSpecifierMap; + } + + FixedFlowController::ActionAfterCasMultiplier FixedFlowController::getAction() const { + return action; + } + +} diff --git a/src/framework/uima/flow.hpp b/src/framework/uima/flow.hpp new file mode 100644 index 0000000..cb943e5 --- /dev/null +++ b/src/framework/uima/flow.hpp @@ -0,0 +1,153 @@ +#ifndef UIMA_FLOW_HPP +#define UIMA_FLOW_HPP + +/** \file flow.hpp . +----------------------------------------------------------------------------- + + + + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + +----------------------------------------------------------------------------- + + Description: Flow interface for the FlowController + +----------------------------------------------------------------------------- + + +-------------------------------------------------------------------------- */ +#include + +#include "uima/pragmas.hpp" +#include "uima/annotator.hpp" + +namespace uima { + namespace internal { + class UIMA_LINK_IMPORTSPEC SimpleStep { + icu::UnicodeString engineName; + /* ResultSpecification *resultSpec; */ + public: + SimpleStep(const icu::UnicodeString &name, ResultSpecification *resultSpec = nullptr) : engineName(name) { + } + + const icu::UnicodeString &getEngineName() const { + return engineName; + } + + /** Capability Language Flow Controller requires ResultSpecification, when it is eventually implemented + bool hasResultSpec() const { + return resultSpec != nullptr; + } + + ResultSpecification* getResultSpec() const { + return resultSpec; + } + */ + }; + + /** + * Not yet implemented + */ + class UIMA_LINK_IMPORTSPEC ParallelStep { + public: + ParallelStep() { + UIMA_EXC_THROW_NEW(NotYetImplementedException, + UIMA_ERR_NOT_YET_IMPLEMENTED, + UIMA_MSG_ID_EXC_NOT_YET_IMPLEMENTED, + ErrorMessage(UIMA_MSG_ID_EXCON_UNKNOWN_CONTEXT), + ErrorInfo::unrecoverable + ); + } + }; + + class UIMA_LINK_IMPORTSPEC FinalStep { + bool forceDropCAS; + + public: + FinalStep() : forceDropCAS(false) { + } + + explicit FinalStep(bool forceDropCAS) : forceDropCAS(forceDropCAS) { + } + + bool getForceDropCAS() const { + return forceDropCAS; + } + }; + } + + /* + * Class Step is a union type of possible step types: Simple Step, Parallel Step and Final Step. + */ + class UIMA_LINK_IMPORTSPEC Step { + public: + enum class StepType { SIMPLESTEP, FINALSTEP, PARALLELSTEP }; + + explicit Step(const internal::SimpleStep &simpleStep); + + explicit Step(const internal::ParallelStep ¶llelStep); + + explicit Step(const internal::FinalStep &finalStep); + + ~Step(); + + const internal::SimpleStep *getSimpleStep() const; + + const internal::ParallelStep *getParallelStep() const; + + const internal::FinalStep *getFinalStep() const; + + StepType getType() const; + private: + union step_type { + internal::SimpleStep simpleStep; + internal::ParallelStep parallelStep; + internal::FinalStep finalStep; + + step_type(const internal::SimpleStep &simpleStep) : simpleStep(simpleStep) { + } + + step_type(const internal::ParallelStep ¶llelStep) : parallelStep(parallelStep) { + } + + step_type(const internal::FinalStep &finalStep) : finalStep(finalStep) { + } + ~step_type() { } + } step; + + const StepType type; + }; + + /** + * Virtual base class for the Flow objects computed by the FlowController + * @see FlowController::computeFlow + */ + class UIMA_LINK_IMPORTSPEC Flow { + public: + virtual ~Flow(){}; + + virtual Step next()=0; + virtual std::unique_ptr newCasProduced(const CAS&, const icu::UnicodeString&)=0; + virtual bool continueOnFailure(const icu::UnicodeString&) { return false; } + virtual void aborted() { } + }; + + +} +#endif //UIMA_FLOW_HPP diff --git a/src/framework/uima/flow_controller.hpp b/src/framework/uima/flow_controller.hpp new file mode 100644 index 0000000..1a54f8f --- /dev/null +++ b/src/framework/uima/flow_controller.hpp @@ -0,0 +1,115 @@ +#ifndef UIMA_FLOW_CONTROLLER_HPP +#define UIMA_FLOW_CONTROLLER_HPP + +/** \file flow_controller.hpp . +----------------------------------------------------------------------------- + + + + + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + +----------------------------------------------------------------------------- + + Description: + +----------------------------------------------------------------------------- + + +-------------------------------------------------------------------------- */ + +#include "uima/annotator_context.hpp" +#include "uima/flow.hpp" +#include "uima/pragmas.hpp" +#include "uima/engine.hpp" + +namespace uima { + class FixedFlowController; + + /** + * A FlowController dictates how CASes are routed within Aggregate Analysis Engines. + */ + class UIMA_LINK_IMPORTSPEC FlowController { + public: + virtual ~FlowController() = default; + + /** + * Initializes this Flow Controller and any related members. + * @param anContext + * Description of the AnalysisEngine that this Flow Controller belongs to. + */ + virtual void initialize(const AnnotatorContext& anContext)=0; + virtual void destroy()=0; + virtual void reconfigure()=0; + virtual std::unique_ptr computeFlow(const CAS&)=0; + }; + + class UIMA_LINK_IMPORTSPEC FixedFlowObject : public Flow { + public: + FixedFlowObject(FixedFlowController *const flowController, int startStep, + bool internallyCreatedCAS = false) : flowController(flowController), currentStep(startStep), + wasPassedToCASMultiplier(false), + newCASProduced(false), + internallyCreatedCAS(internallyCreatedCAS) { + assert(EXISTS(flowController)); + } + + Step next() override; + + std::unique_ptr newCasProduced(const CAS& cas, const icu::UnicodeString& producedBy) override; + + private: + FixedFlowController* flowController; + int currentStep; + bool wasPassedToCASMultiplier; + bool newCASProduced; + bool internallyCreatedCAS; + }; + + + class UIMA_LINK_IMPORTSPEC FixedFlowController : public FlowController { + public: + enum class ActionAfterCasMultiplier { CONTINUE, STOP, DROP, DROP_IF_NEW_CAS_PRODUCED }; + + FixedFlowController() = default; + + void initialize(const AnnotatorContext &anContext) override; + + void destroy() override; + + void reconfigure() override; + + std::unique_ptr computeFlow(const CAS&) override; + + const std::vector& getDelegateKeys() const; + + const std::map& getDelegateSpecifierMap() const; + + ActionAfterCasMultiplier getAction() const; + + private: + // FIXME: Use std::map instead?? + const std::map* delegateSpecifierMap{}; + const FixedFlow* flowContraints{}; + const AnnotatorContext* annotatorContext{}; + ActionAfterCasMultiplier action{ActionAfterCasMultiplier::DROP_IF_NEW_CAS_PRODUCED}; + }; + +} + +#endif //UIMA_FLOW_CONTROLLER_HPP From 50ddfb006e36df05da5a432dd263d22e108dc169 Mon Sep 17 00:00:00 2001 From: mac-op Date: Wed, 24 Jul 2024 23:54:18 -0600 Subject: [PATCH 02/22] framework: allow CAS to keep track of owner and release itself --- src/cas/cas.cpp | 7 ++++ src/cas/uima/cas.hpp | 3 ++ src/framework/annotator_context.cpp | 8 +++-- src/framework/caspool.cpp | 44 +++++++++++++++++++++--- src/framework/uima/annotator_context.hpp | 2 ++ src/framework/uima/caspool.hpp | 3 +- src/test/src/test_engine.cpp | 6 ++-- src/utils/runAECpp.cpp | 4 +-- 8 files changed, 65 insertions(+), 12 deletions(-) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index 521cc8f..d0a3df8 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -809,6 +809,13 @@ namespace uima { } } + void CAS::release() { + if (iv_componentInfo) { + iv_componentInfo->releaseCAS(*this); + } else + std::cerr << "No AnnotatorContext" << "\n\n"; + } + ANIndex CAS::getAnnotationIndex(Type const & crType) { if (isbaseCas) { assertWithMsg(false, "Annotation Index does not exist in Base CAS!"); diff --git a/src/cas/uima/cas.hpp b/src/cas/uima/cas.hpp index bd88b59..51ccd3d 100644 --- a/src/cas/uima/cas.hpp +++ b/src/cas/uima/cas.hpp @@ -772,6 +772,9 @@ namespace uima { icu::UnicodeString getAnnotationIndexID() const { return CAS::INDEXID_ANNOTATION; } + + void release(); + /** @} */ /** @defgroup PreDefTypes Predefined Types diff --git a/src/framework/annotator_context.cpp b/src/framework/annotator_context.cpp index 8e3dcc6..9a0b38e 100644 --- a/src/framework/annotator_context.cpp +++ b/src/framework/annotator_context.cpp @@ -165,7 +165,11 @@ namespace uima { } TyErrorId AnnotatorContext::defineCASPool(size_t numInstances) { - iv_pCasPool = new CASPool(getTaeSpecifier(),numInstances); + if (iv_pParentAnC) { // If this is a delegate + iv_pCasPool = new CASPool(this, iv_pParentAnC->getTaeSpecifier(),numInstances); + } else { + iv_pCasPool = new CASPool(this, getTaeSpecifier(), numInstances); + } if (iv_pCasPool == NULL) { return UIMA_ERR_USER_ANNOTATOR_OUT_OF_MEMORY; } @@ -500,7 +504,7 @@ namespace uima { NameValuePair const * AnnotatorContext::findNameValuePair(const icu::UnicodeString & paramName, const icu::UnicodeString & ancKey) const { /* return findNameValuePair(getGroupNameWhenNotSpec(), paramName, iv_pTaeSpecifier->getSearchStrategy()); */ - NameValuePair const * pValueLocal = pValueLocal = iv_pTaeSpecifier->getNameValuePair(paramName, ancKey); + NameValuePair const * pValueLocal = iv_pTaeSpecifier->getNameValuePair(paramName, ancKey); // the request was invalid we got an exception we leave to others to catch diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index 78e966e..f230bfe 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -31,6 +31,8 @@ #include "uima/caspool.hpp" + +#include "uima/annotator_context.hpp" #include "uima/err_ids.h" #include "uima/msg.h" @@ -45,13 +47,13 @@ namespace uima { // //------------------------------------------------------------ - CASPool::CASPool(const AnalysisEngineDescription & taeSpec, + CASPool::CASPool(const AnalysisEngineDescription &taeSpec, size_t numInstances) - :iv_vecAllInstances(), + : iv_vecAllInstances(), iv_vecFreeInstances(), iv_pCasDef(NULL), - iv_numInstances(numInstances) { - + iv_numInstances(numInstances), + iv_pComponentInfo(nullptr) { iv_pCasDef = uima::internal::CASDefinition::createCASDefinition(taeSpec); if (iv_pCasDef == NULL) { @@ -77,6 +79,38 @@ namespace uima { } + CASPool::CASPool(AnnotatorContext *anContext, const AnalysisEngineDescription &taeSpec, + size_t numInstances) : iv_vecAllInstances(), + iv_vecFreeInstances(), + iv_numInstances(numInstances), + iv_pCasDef(nullptr), + iv_pComponentInfo(anContext) { + iv_pCasDef = internal::CASDefinition::createCASDefinition(taeSpec); + if (iv_pCasDef == nullptr) { + UIMA_EXC_THROW_NEW(CASPoolException, + UIMA_ERR_CASPOOL_CREATE_CASDEFINITION, + UIMA_MSG_ID_EXC_CREATE_CASPOOL, + UIMA_MSG_ID_EXC_CREATE_CASPOOL, + ErrorInfo::unrecoverable); + } + + for (size_t i = 0; i < numInstances; i++) { + CAS *pCas = uima::internal::CASImpl::createCASImpl(*iv_pCasDef, false); + if (pCas == nullptr) { + UIMA_EXC_THROW_NEW(CASPoolException, + UIMA_ERR_CASPOOL_CREATE_CAS, + UIMA_MSG_ID_EXC_CREATE_CASPOOL, + UIMA_MSG_ID_EXC_CREATE_CASPOOL, + ErrorInfo::unrecoverable); + } + if (iv_pComponentInfo) + pCas->setCurrentComponentInfo(iv_pComponentInfo); + iv_vecAllInstances.push_back((CAS *) pCas->getInitialView()); + iv_vecFreeInstances.push_back((CAS *) pCas->getInitialView()); + } + } + + CASPool::~CASPool() { for (size_t i=0; i < iv_vecAllInstances.size(); i++) { delete iv_vecAllInstances[i]; @@ -114,6 +148,8 @@ namespace uima { void CASPool::releaseCAS(CAS & aCas) { aCas.reset(); + if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) == iv_vecAllInstances.end()) + std::cerr << "False: " << iv_pComponentInfo->getTaeSpecifier().getAnnotatorImpName() << std::endl; iv_vecFreeInstances.push_back(&aCas); return; } diff --git a/src/framework/uima/annotator_context.hpp b/src/framework/uima/annotator_context.hpp index 5b63687..c1ed4a1 100644 --- a/src/framework/uima/annotator_context.hpp +++ b/src/framework/uima/annotator_context.hpp @@ -383,6 +383,8 @@ namespace uima { //AnnotatorContext(void); private: friend class uima::internal::EngineBase; + friend class CAS; + friend class internal::AnnotatorManager; AnalysisEngineDescription * getTAESpec() { return iv_pTaeSpecifier; diff --git a/src/framework/uima/caspool.hpp b/src/framework/uima/caspool.hpp index 208b1ca..eb6e63b 100644 --- a/src/framework/uima/caspool.hpp +++ b/src/framework/uima/caspool.hpp @@ -67,7 +67,7 @@ namespace uima { std::vector iv_vecFreeInstances; size_t iv_numInstances; uima::internal::CASDefinition * iv_pCasDef; - + AnnotatorContext* iv_pComponentInfo; public: /** Constructor @@ -76,6 +76,7 @@ namespace uima { */ CASPool(const AnalysisEngineDescription & taeSpec, size_t numInstances); + CASPool(AnnotatorContext* anContext, const AnalysisEngineDescription & taeSpec, size_t numInstances); /** Destructor */ ~CASPool(void); diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index e7f5b1c..9976226 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -555,7 +555,7 @@ void testAggregateCASMultiplier(const util::ConsoleUI &rclConsole) // There should be one Dave in each segment failIfNotTrue(anIndex.getSize() == 1); - pEngine->getAnnotatorContext().releaseCAS(rcas); + rcas.release(); } failIfNotTrue(numSegments == 3); @@ -635,9 +635,9 @@ void mainTest(uima::util::ConsoleUI & rclConsole, testCallingSequence2(rclConsole, cpszConfigFilename); testCallingSequence3(rclConsole, cpszConfigFilename); } - testCasMultiplier(rclConsole); + testCasMultiplier(rclConsole); testAggregateCASMultiplier(rclConsole); + #if 0 - testAggregateCASMultiplier(rclConsole); testAggregateCASCombiner(rclConsole); #endif } diff --git a/src/utils/runAECpp.cpp b/src/utils/runAECpp.cpp index a65c6ca..cf73ee6 100644 --- a/src/utils/runAECpp.cpp +++ b/src/utils/runAECpp.cpp @@ -441,8 +441,8 @@ void process (AnalysisEngine * pEngine, CAS * cas, std::string in, std::string o } //release CAS - pEngine->getAnnotatorContext().releaseCAS(outCas); - + // pEngine->getAnnotatorContext().releaseCAS(outCas); + outCas.release(); cout << "runAECpp::processing new Cas " << i << endl; } From 5d4f657c474a7d3b4cf9b767799637dc5b90c75a Mon Sep 17 00:00:00 2001 From: mac-op Date: Wed, 24 Jul 2024 23:55:00 -0600 Subject: [PATCH 03/22] test: fixed implementation name in DaveDetector --- src/test/data/descriptors/DaveDetector.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/data/descriptors/DaveDetector.xml b/src/test/data/descriptors/DaveDetector.xml index 79a765f..3a50ec5 100644 --- a/src/test/data/descriptors/DaveDetector.xml +++ b/src/test/data/descriptors/DaveDetector.xml @@ -24,7 +24,7 @@ org.apache.uima.cpp true -DaveDetector +libDaveDetector From 280b444e9affcf521906856acf98b67ad7a294c6 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 25 Jul 2024 00:05:00 -0600 Subject: [PATCH 04/22] framework: allow aggregate CAS Multiplier TODO: Documentation and exception handling --- src/framework/annotator_mgr.cpp | 331 ++++++++++++------ src/framework/flow.cpp | 34 ++ src/framework/flow_controller.cpp | 10 +- src/framework/internal_aggregate_engine.cpp | 8 +- src/framework/uima/annotator_mgr.hpp | 36 +- src/framework/uima/flow.hpp | 12 +- .../data/descriptors/SimpleTextSegmenter.xml | 2 +- 7 files changed, 308 insertions(+), 125 deletions(-) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 19aa35c..795b532 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -86,12 +86,13 @@ namespace uima { /* Implementation */ /* ----------------------------------------------------------------------- */ - AnnotatorManager::AnnotatorManager(internal::AggregateEngine & rEngine) : - iv_pEngine(& rEngine), - iv_vecEntries(), - iv_bIsInitialized(false), - iv_uiNbrOfDocsProcessed(0) - /* ----------------------------------------------------------------------- */{ + AnnotatorManager::AnnotatorManager(internal::AggregateEngine & rEngine) : iv_pEngine(&rEngine), + iv_vecEntries(), + iv_uiNbrOfDocsProcessed(0), + iv_pFlowController(nullptr), + iv_bIsInitialized(false), + iv_bOutputNewCases(false) + /* ----------------------------------------------------------------------- */ { ; } @@ -152,12 +153,21 @@ namespace uima { assert(iv_vecEntries.empty()); AnnotatorContext & rANC = iv_pEngine->getAnnotatorContext(); - AnalysisEngineDescription const & crTAESpecifier = rANC.getTaeSpecifier(); // this method must be added - + const AnalysisEngineMetaData* pEngineMetadata = crTAESpecifier.getAnalysisEngineMetaData(); assert( ! crTAESpecifier.isPrimitive() ); - //BSIvector < icu::UnicodeString > const & crVecEngineNames = crTAESpecifier.getAnalysisEngineMetaData()->getFixedFlow()->getNodes(); - vector < icu::UnicodeString > const & crVecEngineNames = crTAESpecifier.getAnalysisEngineMetaData()->getFlowConstraints()->getNodes(); + + // FIXME: This shouldn't have been necessary since FlowContrainst::getFlowContraintsType + // should have been const in the first place + auto flowContraints = CONST_CAST(FlowConstraints *, pEngineMetadata->getFlowConstraints()); + if (flowContraints->getFlowConstraintsType() == FlowConstraints::FIXED) { + iv_pFlowController = new FixedFlowController; + iv_pFlowController->initialize(rANC); + } + + if (const OperationalProperties* operationalProps = pEngineMetadata->getOperationalProperties()) + iv_bOutputNewCases = operationalProps->getOutputsNewCASes(); + vector < icu::UnicodeString > const & crVecEngineNames = flowContraints->getNodes(); // for all engines in the flow size_t ui; @@ -379,10 +389,10 @@ namespace uima { bool AnnotatorManager::shouldEngineBeCalled(uima::internal::CapabilityContainer const & crCapContainer, - ResultSpecification const & rResultSpec, - Language const & crLanguage, - vector & rTOFSToBeRemoved) { - util::Trace clTrace(util::enTraceDetailHigh, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); + ResultSpecification const &rResultSpec, + Language const &crLanguage, + vector &rTOFSToBeRemoved) { + util::Trace clTrace(util::enTraceDetailHigh, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); #ifdef DEBUG_VERBOSE UIMA_TPRINT("CapContainer:"); @@ -403,39 +413,31 @@ namespace uima { #endif // treat dump-like annotators for this language specially - if (crCapContainer.hasEmptyOutputTypeOrFeatures( crLanguage )) { + if (crCapContainer.hasEmptyOutputTypeOrFeatures(crLanguage)) { return true; } - ResultSpecification::TyTypeOrFeatureSTLSet const & crTOFSet = rResultSpec.getTypeOrFeatureSTLSet(); + ResultSpecification::TyTypeOrFeatureSTLSet const &crTOFSet = rResultSpec.getTypeOrFeatureSTLSet(); bool bHasTOF = false; - ResultSpecification::TyTypeOrFeatureSTLSet::const_iterator cit; - for (cit = crTOFSet.begin(); cit != crTOFSet.end(); ++cit) { - TypeOrFeature const & crTOF = (*cit); - assert( (*cit).isValid() ); - assert( crTOF.isValid() ); - assert( rResultSpec.contains( crTOF ) ); - + for (const auto & crTOF : crTOFSet) { + assert(crTOF.isValid()); + assert(rResultSpec.contains( crTOF )); UIMA_TPRINT(" TOF Name: " << crTOF.getName()); if (crCapContainer.hasOutputTypeOrFeature(crTOF, crLanguage)) { - assert( containsTOF(crTOF, crLanguage, crCapContainer) ); - UIMA_TPRINT( " in capability" ); + assert(containsTOF(crTOF, crLanguage, crCapContainer)); + UIMA_TPRINT(" in capability"); bHasTOF = true; rTOFSToBeRemoved.push_back(crTOF); } else { - assert( ! containsTOF(crTOF, crLanguage, crCapContainer) ); + assert(! containsTOF(crTOF, crLanguage, crCapContainer)); UIMA_TPRINT(" not in capability"); } } return bHasTOF; } - - - - - TyErrorId AnnotatorManager::launchProcessDocument(CAS & cas, ResultSpecification const & crResultSpec) { + TyErrorId AnnotatorManager::processCapabilityLanguageFlow(CAS &cas, ResultSpecification const &crResultSpec) { /* This works as follows: The passes result spec is copied and for each delegate AE, it is determined @@ -449,14 +451,14 @@ namespace uima { it nonetheless must sepcify that it needs tokens, sentences, and paragraphs because this is what the summarizer needs as input. */ - util::Trace clTrace(util::enTraceDetailLow, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); + util::Trace clTrace(util::enTraceDetailLow, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); UIMA_ANNOTATOR_TIMING(iv_clTimerLaunchProcess.start()); - TyAnnotatorEntries::iterator it; - TyErrorId utErrorId = UIMA_ERR_NONE; - TyErrorId utRetVal = UIMA_ERR_NONE; - assert( EXISTS(iv_pEngine) ); - size_t uiNbrOfSkippedAnnotators = 0; - CAS * tcas=NULL; + + TyErrorId utErrorId = UIMA_ERR_NONE; + TyErrorId utRetVal = UIMA_ERR_NONE; + assert(EXISTS(iv_pEngine)); + size_t uiNbrOfSkippedAnnotators = 0; + CAS *tcas = nullptr; // copy the result spec ResultSpecification resSpec = crResultSpec; @@ -465,10 +467,9 @@ namespace uima { assert(iv_bIsInitialized); assert(!iv_vecEntries.empty()); - for (it = iv_vecEntries.begin(); it != iv_vecEntries.end(); ++it) { - EngineEntry & rEntry = (*it); - AnalysisEngine * pEngine = rEntry.iv_pEngine; - uima::internal::CapabilityContainer * pCapContainer = rEntry.iv_pCapabilityContainer; + for (EngineEntry &engineEntry: iv_vecEntries) { + AnalysisEngine *pEngine = engineEntry.iv_pEngine; + CapabilityContainer *pCapContainer = engineEntry.iv_pCapabilityContainer; assert(EXISTS(pEngine)); assert(EXISTS(pCapContainer)); @@ -478,38 +479,23 @@ namespace uima { resSpec.print(cout); #endif - UIMA_TRACE_STREAM_ARG(clTrace, "ASB checks engine", pEngine->getAnalysisEngineMetaData().getName() ); + UIMA_TRACE_STREAM_ARG(clTrace, "ASB checks engine", pEngine->getAnalysisEngineMetaData().getName()); UIMA_TPRINT("--------- Checking annotator: " << pEngine->getAnalysisEngineMetaData().getName()); vector tofsToBeRemoved; - bool callEngine=true; - bool requiresTCas=true; + bool callEngine = true; + bool requiresTCas = true; if (cas.isBackwardCompatibleCas()) { - tcas = &cas; - } - //this populates the tofsToBeRemoved vector so always call it - callEngine = shouldEngineBeCalled(*pCapContainer, - resSpec, - cas.getDocumentAnnotation().getLanguage(), - tofsToBeRemoved); - //check the FlowConstraintType specified in the aggregate engine - //if CapabilityLanguageFlow whether engine is called is - //determined by shouldEngineBeCalled() - AnnotatorContext & rANC = iv_pEngine->getAnnotatorContext(); - AnalysisEngineDescription const & crTAESpecifier = rANC.getTaeSpecifier(); - FlowConstraints const * pFlow = crTAESpecifier.getAnalysisEngineMetaData()->getFlowConstraints(); - FlowConstraints * flow = CONST_CAST(FlowConstraints *, pFlow); - FlowConstraints::EnFlowType flowType = flow->getFlowConstraintsType(); - - //if FixedFlow specified all engines are always called so reset callEngine is true - if (flowType == FlowConstraints::FIXED) { - callEngine=true; - } - - - if ( callEngine ) { + tcas = &cas; + } + //this populates the tofsToBeRemoved vector so always call it + callEngine = shouldEngineBeCalled(*pCapContainer, + resSpec, + cas.getDocumentAnnotation().getLanguage(), + tofsToBeRemoved); + if (callEngine) { UIMA_TPRINT("----------- engine will be processed"); UIMA_TRACE_STREAM(clTrace, "Engine will be called"); @@ -517,73 +503,214 @@ namespace uima { // this must be done because an annotator should only be called with the result spec // that its XML file indicates. ResultSpecification annResSpec; - vector::const_iterator citTOF; - for (citTOF = tofsToBeRemoved.begin(); citTOF != tofsToBeRemoved.end(); ++citTOF) { - assert( (*citTOF).isValid() ); - annResSpec.add(*citTOF); - UIMA_TRACE_STREAM_ARG(clTrace, " engine is called with result spec", (*citTOF).getName() ); + for (const TypeOrFeature &tof: tofsToBeRemoved) { + assert(tof.isValid()); + annResSpec.add(tof); + UIMA_TRACE_STREAM_ARG(clTrace, " engine is called with result spec", tof.getName()); } /// does engine expect a TCas //AEs that declare at least one input or output SofA should be sent the base CAS. //Otherwise they must be sent a TCAS. - const AnalysisEngineMetaData::TyVecpCapabilities & vecCap = pEngine->getAnalysisEngineMetaData().getCapabilites(); - AnalysisEngineMetaData::TyVecpCapabilities::const_iterator itCap; - for (size_t i=0; i < vecCap.size(); i++) { - Capability * cap = vecCap.at(i); - Capability::TyVecCapabilitySofas inputSofa = cap->getCapabilitySofas(Capability::INPUTSOFA); - Capability::TyVecCapabilitySofas outputSofa = cap->getCapabilitySofas(Capability::OUTPUTSOFA); - if (inputSofa.size() > 0 || outputSofa.size() > 0) { + const AnalysisEngineMetaData::TyVecpCapabilities &vecCap = pEngine->getAnalysisEngineMetaData(). + getCapabilites(); + for (Capability *cap: vecCap) { + const auto &inputSofa = cap->getCapabilitySofas(Capability::INPUTSOFA); + const auto &outputSofa = cap->getCapabilitySofas(Capability::OUTPUTSOFA); + if (!inputSofa.empty() || !outputSofa.empty()) { requiresTCas = false; break; } } if (requiresTCas) { - SofaFS defSofa = cas.getSofa(pEngine->getAnnotatorContext().mapToSofaID(CAS::NAME_DEFAULT_TEXT_SOFA)); - if (!defSofa.isValid()) { - //TODO: throw exception - cerr << "could not get default text sofa " << endl; - return 99; - } - tcas = cas.getView(defSofa); - utErrorId = pEngine->process(*tcas, annResSpec); + SofaFS defSofa = cas.getSofa(pEngine->getAnnotatorContext().mapToSofaID(CAS::NAME_DEFAULT_TEXT_SOFA)); + if (!defSofa.isValid()) { + //TODO: throw exception + cerr << "could not get default text sofa " << endl; + return 99; + } + tcas = cas.getView(defSofa); + utErrorId = pEngine->process(*tcas, annResSpec); } else { - utErrorId = ((AnalysisEngine*) pEngine)->process(cas, annResSpec); + utErrorId = pEngine->process(cas, annResSpec); } if (utErrorId != UIMA_ERR_NONE) { clTrace.dump(_TEXT("Error"), (long) utErrorId); - utRetVal = utErrorId; /* I know, this overwrites a previous error */ + utRetVal = utErrorId; /* I know, this overwrites a previous error */ } else { // now remove TOFs from ResultSpec - vector::const_iterator citTOF; - for (citTOF = tofsToBeRemoved.begin(); citTOF != tofsToBeRemoved.end(); ++citTOF) { - assert( (*citTOF).isValid() ); - resSpec.remove(*citTOF); + for (const auto &crTof: tofsToBeRemoved) { + assert(crTof.isValid()); + resSpec.remove(crTof); } } } else { - assert( tofsToBeRemoved.empty() ); + assert(tofsToBeRemoved.empty()); UIMA_TPRINT("----------- engine will *not* be processed"); ++uiNbrOfSkippedAnnotators; } - } /* e-o-for */ + } /* e-o-for */ /* in case there was no error but not any annotator which generates a target type has been caled for process, we have an error */ UIMA_TPRINT("Annotators skipped due to unsupport lang: " << uiNbrOfSkippedAnnotators); - UIMA_TPRINT("Overall number of annotators: " << iv_vecEntries.size() ); + UIMA_TPRINT("Overall number of annotators: " << iv_vecEntries.size()); - if ( (utRetVal == UIMA_ERR_NONE) - && (uiNbrOfSkippedAnnotators > 0) - && (uiNbrOfSkippedAnnotators == iv_vecEntries.size()) - && (crResultSpec.getSize() > 0) ) { + if ((utRetVal == UIMA_ERR_NONE) + && (uiNbrOfSkippedAnnotators > 0) + && (uiNbrOfSkippedAnnotators == iv_vecEntries.size()) + && (crResultSpec.getSize() > 0)) { // utRetVal = UIMA_ERR_ANNOTATOR_MGR_LANG_NOT_SUPPORTED_FOR_ANNOTATOR; iv_pEngine->getAnnotatorContext().getLogger().logWarning("All annotators skipped (maybe unsupported language)"); } UIMA_ANNOTATOR_TIMING(iv_clTimerLaunchProcess.stop()); - return(utRetVal); + return utRetVal; + } + + CAS* AnnotatorManager::processUntilNextOutputCas() { + unique_ptr flow{}; + while (true) { + CAS* currentCas = nullptr; + AnnotatorContext* annContext = nullptr; // The AnnotatorContext that manages the produced CAS + Step nextStep; + flow = nullptr; + + while (!currentCas) { + if (casIterStack.empty()) return nullptr; + + StackFrame &frame = casIterStack.top(); + try { + if (frame.casMultiplier && frame.casMultiplier->hasNext()) { + currentCas = &frame.casMultiplier->next(); + annContext = &frame.casMultiplier->getAnnotatorContext(); + flow = frame.originalFlow->newCasProduced(*currentCas, frame.lastEngineKey); + } + } catch (Exception& exception) { + // TODO: + throw; + } + + if (!currentCas) { + currentCas = frame.originalCas; + annContext = frame.casMultiplier ? &frame.casMultiplier->getAnnotatorContext() : nullptr; + flow = std::move(frame.originalFlow); + currentCas->setCurrentComponentInfo(nullptr); + casIterStack.pop(); + } + + if (nextStep.getType() == Step::StepType::UNSPECIFIED) { + nextStep = flow->next(); + } + + while (nextStep.getType() != Step::StepType::FINALSTEP) { + if (nextStep.getType() == Step::StepType::SIMPLESTEP) { + const icu::UnicodeString& nextAEName = nextStep.getSimpleStep()->getEngineName(); + auto z = DYNAMIC_CAST(FixedFlowController*, iv_pFlowController)->getDelegateSpecifierMap().at(nextAEName); + // auto it = std::find_if(iv_vecEntries.begin(), iv_vecEntries.end(), + // [nextAEName](const EngineEntry &entry) { + // return entry.iv_pEngine->getAnalysisEngineMetaData().getName() == nextAEName; + // }); + // std::cout << "Engine name: " << nextAEName << std::endl; + auto it = iv_vecEntries.begin(); + for (; it != iv_vecEntries.end(); ++it) { + auto &x = it->iv_pEngine->getAnalysisEngineMetaData().getName(); + auto &y = it->iv_pEngine->getAnnotatorContext(); + // std::cout << x << std::endl; + if ( &y == z) + break; + } + + if (it != iv_vecEntries.end()) { + AnalysisEngine* nextAE = it->iv_pEngine; + CAS* outputCas = nullptr; + + try { + CASIterator casIter = nextAE->processAndOutputNewCASes(*currentCas); + if (casIter.hasNext()) + outputCas = &casIter.next(); + } catch (Exception& e) { + // TODO: Handle exception by checking if we're allowed to continue on failure + } + + if (outputCas) { + std::unique_ptr nextFlow = flow->newCasProduced(*outputCas, nextAEName); + casIterStack.push({nextAE, currentCas, std::move(flow), nextAEName}); + flow = std::move(nextFlow); + currentCas = outputCas; + annContext = &nextAE->getAnnotatorContext(); + } else { + currentCas->setCurrentComponentInfo(nullptr); + } + } else { + // TODO: Throw invalid key exception + } + } else if (nextStep.getType() == Step::StepType::PARALLELSTEP) { + // TODO: ParallelStep not supported yet + } else { + // TODO: Throw unsupported step type + } + + nextStep = flow->next(); + } + + const FinalStep* finalStep = nextStep.getFinalStep(); + if (currentCas == inputCas) { + if (finalStep->getForceDropCAS()) { + // TODO: Throw excetion (not allowed to drop input CAS) + } + return nullptr; + } + + if (iv_bOutputNewCases && !finalStep->getForceDropCAS()) + return currentCas; + // annContext->releaseCAS(*currentCas); + currentCas->release(); + } + } + } + + bool AnnotatorManager::hasNext() { + if (!nextCas) + nextCas = processUntilNextOutputCas(); + return nextCas != nullptr; + } + + CAS & AnnotatorManager::next() { + CAS* result = nextCas; + if (!result) + result = processUntilNextOutputCas(); + if (!result) { + UIMA_EXC_THROW_NEW(Exception, + UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS, + UIMA_MSG_ID_EXCON_PROCESSING_CAS, + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "There is not next() available."), + ErrorInfo::unrecoverable); + } + nextCas = nullptr; + return *result; + } + + + TyErrorId AnnotatorManager::launchProcessDocument(CAS &cas, ResultSpecification const &crResultSpec) { + //if engine uses Capability Language Flow + // TODO: Turn this logic and processCapabilityLanguageFlow into a separate CapabilityLanguageFlowController class that inherits from FlowController + AnnotatorContext &rANC = iv_pEngine->getAnnotatorContext(); + AnalysisEngineDescription const &crTAESpecifier = rANC.getTaeSpecifier(); + FlowConstraints const *pFlow = crTAESpecifier.getAnalysisEngineMetaData()->getFlowConstraints(); + FlowConstraints *flow = CONST_CAST(FlowConstraints *, pFlow); + + // Process according to capability language specifications + if (flow->getFlowConstraintsType() == FlowConstraints::CAPABILITYLANGUAGE) + return processCapabilityLanguageFlow(cas, crResultSpec); + + inputCas = &cas; + + casIterStack.push({nullptr, inputCas, iv_pFlowController->computeFlow(*inputCas), {}}); + nextCas = processUntilNextOutputCas(); + + return UIMA_ERR_NONE; } #ifdef UIMA_DEBUG_ANNOTATOR_TIMING diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp index 3b6dfd2..1928602 100644 --- a/src/framework/flow.cpp +++ b/src/framework/flow.cpp @@ -42,6 +42,38 @@ namespace uima { type(StepType::FINALSTEP) { } + Step::Step(const Step& other) :type(other.type){ + switch (other.type) { + case StepType::SIMPLESTEP: + step.simpleStep = other.step.simpleStep; + break; + case StepType::FINALSTEP: + step.finalStep = other.step.finalStep; + break; + default: + break; + } + + } + + Step & Step::operator=(const Step &other) { + if (this == &other) { + return *this; + } + type = other.type; + switch (other.type) { + case StepType::SIMPLESTEP: + step.simpleStep = other.step.simpleStep; + break; + case StepType::FINALSTEP: + step.finalStep = other.step.finalStep; + break; + default: + break; + } + return *this; + } + Step::~Step() { switch (type) { case StepType::SIMPLESTEP: @@ -53,6 +85,8 @@ namespace uima { case StepType::PARALLELSTEP: step.parallelStep.~ParallelStep(); break; + default: + break; } } diff --git a/src/framework/flow_controller.cpp b/src/framework/flow_controller.cpp index ae9c543..2d11322 100644 --- a/src/framework/flow_controller.cpp +++ b/src/framework/flow_controller.cpp @@ -52,8 +52,8 @@ namespace uima { const icu::UnicodeString &engineName = delegateKeys[currentStep]; const AnnotatorContext* engineContext = flowController->getDelegateSpecifierMap().at(engineName); const AnalysisEngineMetaData* engineMetadata = engineContext->getTaeSpecifier().getAnalysisEngineMetaData(); - - if (engineMetadata->getOperationalProperties()->getOutputsNewCASes()) + const OperationalProperties* operationalProps = engineMetadata->getOperationalProperties(); + if ( operationalProps && operationalProps->getOutputsNewCASes()) wasPassedToCASMultiplier = true; return Step(internal::SimpleStep(delegateKeys[currentStep++])); @@ -64,10 +64,8 @@ namespace uima { const std::vector& delegateKeys = flowController->getDelegateKeys(); int i = 0; - for (; i < delegateKeys.size(); ++i) { - if (producedBy == delegateKeys[i]) break; - } - + while (producedBy != delegateKeys.at(i)) + ++i; return std::make_unique(flowController, i+1, true); } diff --git a/src/framework/internal_aggregate_engine.cpp b/src/framework/internal_aggregate_engine.cpp index c0ca218..746c57a 100644 --- a/src/framework/internal_aggregate_engine.cpp +++ b/src/framework/internal_aggregate_engine.cpp @@ -220,15 +220,11 @@ namespace uima { } bool AggregateEngine::hasNextImpl() { - return false; + return iv_annotatorMgr.hasNext(); } CAS & AggregateEngine::nextImpl() { - UIMA_EXC_THROW_NEW(ExcInvalidRequest, - UIMA_ERR_NOT_YET_IMPLEMENTED, - UIMA_MSG_ID_EXC_INVALID_CALL_TO_NEXT, - UIMA_MSG_ID_EXC_INVALID_CALL_TO_NEXT, - ErrorInfo::unrecoverable); + return iv_annotatorMgr.next(); } int AggregateEngine::getCasInstancesRequiredImpl() { diff --git a/src/framework/uima/annotator_mgr.hpp b/src/framework/uima/annotator_mgr.hpp index 9519f58..d6b9ad0 100644 --- a/src/framework/uima/annotator_mgr.hpp +++ b/src/framework/uima/annotator_mgr.hpp @@ -43,12 +43,14 @@ #include "uima/pragmas.hpp" //must be included first to disable warnings #include +#include #include "uima/annotator_timing.hpp" #include "uima/exceptions.hpp" #include "uima/timedatetools.hpp" #include "uima/result_specification.hpp" +#include "uima/flow_controller.hpp" //#include "uima/internal_capability_container.hpp" /* ----------------------------------------------------------------------- */ @@ -204,6 +206,8 @@ namespace uima { protected: /* --- functions --- */ private: + friend class PrimitiveEngine; + friend class AggregateEngine; #ifdef UIMA_COMP_REQ_PUBLIC_TYPES public: #endif @@ -212,17 +216,27 @@ namespace uima { internal::CapabilityContainer * iv_pCapabilityContainer; } EngineEntry; + + struct StackFrame { + AnalysisEngine* casMultiplier; + CAS* originalCas; + std::unique_ptr originalFlow; + icu::UnicodeString lastEngineKey; + }; /* --- types --- */ typedef std::vector < EngineEntry > TyAnnotatorEntries; - private: - friend class uima::internal::PrimitiveEngine; - // the engine whic howns this annotator manager - internal::AggregateEngine * iv_pEngine; + // the engine which owns this annotator manager + AggregateEngine * iv_pEngine; /* --- variables --- */ - TyAnnotatorEntries iv_vecEntries; - bool iv_bIsInitialized; - size_t iv_uiNbrOfDocsProcessed; // for timing statistics - + TyAnnotatorEntries iv_vecEntries; + std::stack casIterStack; + + size_t iv_uiNbrOfDocsProcessed; // for timing statistics + FlowController* iv_pFlowController; + CAS* inputCas{}; + CAS* nextCas{}; + bool iv_bIsInitialized; + bool iv_bOutputNewCases; /* --- functions --- */ #ifdef UIMA_DEBUG_ANNOTATOR_TIMING Timer iv_clTimerLaunchInit; @@ -243,6 +257,12 @@ namespace uima { Language const &, std::vector&) ; + TyErrorId processCapabilityLanguageFlow(CAS &cas, ResultSpecification const &crResultSpec); + + CAS* processUntilNextOutputCas(); + + bool hasNext(); + CAS& next(); /* COPY CONSTRUCTOR NOT SUPPORTED */ AnnotatorManager(const AnnotatorManager & ); //lint !e1704 /* ASSIGNMENT OPERATOR NOT SUPPORTED */ diff --git a/src/framework/uima/flow.hpp b/src/framework/uima/flow.hpp index cb943e5..357009e 100644 --- a/src/framework/uima/flow.hpp +++ b/src/framework/uima/flow.hpp @@ -97,7 +97,10 @@ namespace uima { */ class UIMA_LINK_IMPORTSPEC Step { public: - enum class StepType { SIMPLESTEP, FINALSTEP, PARALLELSTEP }; + enum class StepType { SIMPLESTEP, FINALSTEP, PARALLELSTEP, UNSPECIFIED }; + + Step() : type(StepType::UNSPECIFIED) { + } explicit Step(const internal::SimpleStep &simpleStep); @@ -105,6 +108,10 @@ namespace uima { explicit Step(const internal::FinalStep &finalStep); + Step(const Step &other); + + Step& operator=(const Step& other); + ~Step(); const internal::SimpleStep *getSimpleStep() const; @@ -128,10 +135,11 @@ namespace uima { step_type(const internal::FinalStep &finalStep) : finalStep(finalStep) { } + step_type() { } ~step_type() { } } step; - const StepType type; + StepType type; }; /** diff --git a/src/test/data/descriptors/SimpleTextSegmenter.xml b/src/test/data/descriptors/SimpleTextSegmenter.xml index db5d7fe..84067c8 100644 --- a/src/test/data/descriptors/SimpleTextSegmenter.xml +++ b/src/test/data/descriptors/SimpleTextSegmenter.xml @@ -32,7 +32,7 @@ Simple Text Segmenter Splits a text document into pieces. The point at which the text is split is determined by - SegmentDelimiter configuration parameter which defaults to new line ('\n'). + SegmentDelimiter configuration parameter which defaults to '.' The last segment in the document will have lastSegment set to true. 1.0 From 8aa5919798be45abc8e9b3b914c5a6ec12e97cb8 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 25 Jul 2024 01:48:07 -0600 Subject: [PATCH 05/22] framework: fixed small hack in AnnotatorManager No longer have to cast FlowController to FixedFlowController when we want to find an engine --- src/framework/annotator_mgr.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 795b532..514ca99 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -573,7 +573,6 @@ namespace uima { unique_ptr flow{}; while (true) { CAS* currentCas = nullptr; - AnnotatorContext* annContext = nullptr; // The AnnotatorContext that manages the produced CAS Step nextStep; flow = nullptr; @@ -584,7 +583,6 @@ namespace uima { try { if (frame.casMultiplier && frame.casMultiplier->hasNext()) { currentCas = &frame.casMultiplier->next(); - annContext = &frame.casMultiplier->getAnnotatorContext(); flow = frame.originalFlow->newCasProduced(*currentCas, frame.lastEngineKey); } } catch (Exception& exception) { @@ -594,7 +592,6 @@ namespace uima { if (!currentCas) { currentCas = frame.originalCas; - annContext = frame.casMultiplier ? &frame.casMultiplier->getAnnotatorContext() : nullptr; flow = std::move(frame.originalFlow); currentCas->setCurrentComponentInfo(nullptr); casIterStack.pop(); @@ -607,20 +604,10 @@ namespace uima { while (nextStep.getType() != Step::StepType::FINALSTEP) { if (nextStep.getType() == Step::StepType::SIMPLESTEP) { const icu::UnicodeString& nextAEName = nextStep.getSimpleStep()->getEngineName(); - auto z = DYNAMIC_CAST(FixedFlowController*, iv_pFlowController)->getDelegateSpecifierMap().at(nextAEName); - // auto it = std::find_if(iv_vecEntries.begin(), iv_vecEntries.end(), - // [nextAEName](const EngineEntry &entry) { - // return entry.iv_pEngine->getAnalysisEngineMetaData().getName() == nextAEName; - // }); - // std::cout << "Engine name: " << nextAEName << std::endl; - auto it = iv_vecEntries.begin(); - for (; it != iv_vecEntries.end(); ++it) { - auto &x = it->iv_pEngine->getAnalysisEngineMetaData().getName(); - auto &y = it->iv_pEngine->getAnnotatorContext(); - // std::cout << x << std::endl; - if ( &y == z) - break; - } + auto it = std::find_if(iv_vecEntries.begin(), iv_vecEntries.end(), + [&, nextAEName](const EngineEntry &entry) { + return entry.iv_pEngine->getAnnotatorContext().iv_AnCKey == nextAEName; + }); if (it != iv_vecEntries.end()) { AnalysisEngine* nextAE = it->iv_pEngine; @@ -639,7 +626,6 @@ namespace uima { casIterStack.push({nextAE, currentCas, std::move(flow), nextAEName}); flow = std::move(nextFlow); currentCas = outputCas; - annContext = &nextAE->getAnnotatorContext(); } else { currentCas->setCurrentComponentInfo(nullptr); } @@ -665,8 +651,7 @@ namespace uima { if (iv_bOutputNewCases && !finalStep->getForceDropCAS()) return currentCas; - // annContext->releaseCAS(*currentCas); - currentCas->release(); + currentCas->release(); } } } From d579898126d0e01c21d15697192d02b6e4d2629c Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 25 Jul 2024 17:56:08 -0600 Subject: [PATCH 06/22] framework: added function and class documentation --- src/cas/uima/cas.hpp | 3 + src/framework/flow.cpp | 26 ++++----- src/framework/flow_controller.cpp | 2 +- src/framework/uima/annotator_mgr.hpp | 6 ++ src/framework/uima/flow.hpp | 78 ++++++++++++++++++++------ src/framework/uima/flow_controller.hpp | 63 ++++++++++++++++++--- 6 files changed, 140 insertions(+), 38 deletions(-) diff --git a/src/cas/uima/cas.hpp b/src/cas/uima/cas.hpp index 51ccd3d..3834b57 100644 --- a/src/cas/uima/cas.hpp +++ b/src/cas/uima/cas.hpp @@ -773,6 +773,9 @@ namespace uima { return CAS::INDEXID_ANNOTATION; } + /** + * When called this CAS will release itself by calling releaseCas on the AnnotatorContext that owns it. + */ void release(); /** @} */ diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp index 1928602..fcb4dc1 100644 --- a/src/framework/flow.cpp +++ b/src/framework/flow.cpp @@ -30,25 +30,25 @@ #include "uima/flow.hpp" namespace uima { - Step::Step(const internal::SimpleStep &simpleStep): step(simpleStep), + Step::Step(const internal::SimpleStep &simpleStep): uStep(simpleStep), type(StepType::SIMPLESTEP) { } - Step::Step(const internal::ParallelStep ¶llelStep): step(parallelStep), + Step::Step(const internal::ParallelStep ¶llelStep): uStep(parallelStep), type(StepType::PARALLELSTEP) { } - Step::Step(const internal::FinalStep &finalStep): step(finalStep), + Step::Step(const internal::FinalStep &finalStep): uStep(finalStep), type(StepType::FINALSTEP) { } Step::Step(const Step& other) :type(other.type){ switch (other.type) { case StepType::SIMPLESTEP: - step.simpleStep = other.step.simpleStep; + uStep.simpleStep = other.uStep.simpleStep; break; case StepType::FINALSTEP: - step.finalStep = other.step.finalStep; + uStep.finalStep = other.uStep.finalStep; break; default: break; @@ -63,10 +63,10 @@ namespace uima { type = other.type; switch (other.type) { case StepType::SIMPLESTEP: - step.simpleStep = other.step.simpleStep; + uStep.simpleStep = other.uStep.simpleStep; break; case StepType::FINALSTEP: - step.finalStep = other.step.finalStep; + uStep.finalStep = other.uStep.finalStep; break; default: break; @@ -77,13 +77,13 @@ namespace uima { Step::~Step() { switch (type) { case StepType::SIMPLESTEP: - step.simpleStep.~SimpleStep(); + uStep.simpleStep.~SimpleStep(); break; case StepType::FINALSTEP: - step.finalStep.~FinalStep(); + uStep.finalStep.~FinalStep(); break; case StepType::PARALLELSTEP: - step.parallelStep.~ParallelStep(); + uStep.parallelStep.~ParallelStep(); break; default: break; @@ -92,17 +92,17 @@ namespace uima { const internal::SimpleStep * Step::getSimpleStep() const { if (type != StepType::SIMPLESTEP) return nullptr; - return &step.simpleStep; + return &uStep.simpleStep; } const internal::ParallelStep * Step::getParallelStep() const { if (type != StepType::PARALLELSTEP) return nullptr; - return &step.parallelStep; + return &uStep.parallelStep; } const internal::FinalStep * Step::getFinalStep() const { if (type != StepType::FINALSTEP) return nullptr; - return &step.finalStep; + return &uStep.finalStep; } Step::StepType Step::getType() const { diff --git a/src/framework/flow_controller.cpp b/src/framework/flow_controller.cpp index 2d11322..cacea56 100644 --- a/src/framework/flow_controller.cpp +++ b/src/framework/flow_controller.cpp @@ -86,7 +86,7 @@ namespace uima { initialize(*annotatorContext); } - std::unique_ptr FixedFlowController::computeFlow(const CAS&) { + std::unique_ptr FixedFlowController::computeFlow(CAS&) { return std::make_unique(this, 0); } diff --git a/src/framework/uima/annotator_mgr.hpp b/src/framework/uima/annotator_mgr.hpp index d6b9ad0..41915ee 100644 --- a/src/framework/uima/annotator_mgr.hpp +++ b/src/framework/uima/annotator_mgr.hpp @@ -257,12 +257,18 @@ namespace uima { Language const &, std::vector&) ; + /** Helper method that handles the input CAS for Capability Language Flow */ TyErrorId processCapabilityLanguageFlow(CAS &cas, ResultSpecification const &crResultSpec); + /** This runs the aggregate engine from the current state until a new CAS is output */ CAS* processUntilNextOutputCas(); + /** Called by Aggregate Engine's hasNext */ bool hasNext(); + + /** Called by Aggregate Engine's next */ CAS& next(); + /* COPY CONSTRUCTOR NOT SUPPORTED */ AnnotatorManager(const AnnotatorManager & ); //lint !e1704 /* ASSIGNMENT OPERATOR NOT SUPPORTED */ diff --git a/src/framework/uima/flow.hpp b/src/framework/uima/flow.hpp index 357009e..4e857c2 100644 --- a/src/framework/uima/flow.hpp +++ b/src/framework/uima/flow.hpp @@ -39,18 +39,28 @@ namespace uima { namespace internal { + /** + * Indicates that a CAS should be routed to a single AnalysisEngine. + */ class UIMA_LINK_IMPORTSPEC SimpleStep { - icu::UnicodeString engineName; + /* The key of the engine the CAS will be input to. + Not to be confused with the engine name, which is specified by the its descriptor*/ + icu::UnicodeString engineKey; /* ResultSpecification *resultSpec; */ public: - SimpleStep(const icu::UnicodeString &name, ResultSpecification *resultSpec = nullptr) : engineName(name) { + /* If SimpleStepWithResultSpec is required when CapabilityLanguageFlowController is implemented, + * this class could have an extra member ResultSpecification + SimpleStep(const icu::UnicodeString &name, ResultSpecification *resultSpec) : engineKey(name) { + } */ + + SimpleStep(const icu::UnicodeString &name) : engineKey(name) { } const icu::UnicodeString &getEngineName() const { - return engineName; + return engineKey; } - /** Capability Language Flow Controller requires ResultSpecification, when it is eventually implemented + /** bool hasResultSpec() const { return resultSpec != nullptr; } @@ -76,7 +86,11 @@ namespace uima { } }; + /* Indicates that a CAS has finished being processed by the aggregate. + */ class UIMA_LINK_IMPORTSPEC FinalStep { + /* Whether the CAS should be dropped. Should only be true for CASes produced internally by the aggregate. + */ bool forceDropCAS; public: @@ -92,8 +106,11 @@ namespace uima { }; } - /* - * Class Step is a union type of possible step types: Simple Step, Parallel Step and Final Step. + + /** + * Class Step indicates where to route the current CAS to next. + * It is a union type of possible step types: Simple Step, Parallel Step and Final Step. + * It is returned using internal::Flow::next */ class UIMA_LINK_IMPORTSPEC Step { public: @@ -122,38 +139,67 @@ namespace uima { StepType getType() const; private: - union step_type { + union step_union { internal::SimpleStep simpleStep; internal::ParallelStep parallelStep; internal::FinalStep finalStep; - step_type(const internal::SimpleStep &simpleStep) : simpleStep(simpleStep) { + step_union(const internal::SimpleStep &simpleStep) : simpleStep(simpleStep) { } - step_type(const internal::ParallelStep ¶llelStep) : parallelStep(parallelStep) { + step_union(const internal::ParallelStep ¶llelStep) : parallelStep(parallelStep) { } - step_type(const internal::FinalStep &finalStep) : finalStep(finalStep) { + step_union(const internal::FinalStep &finalStep) : finalStep(finalStep) { } - step_type() { } - ~step_type() { } - } step; + step_union() { } + ~step_union() { } + } uStep; StepType type; }; + /** - * Virtual base class for the Flow objects computed by the FlowController + * Base class for the Flow objects computed by the FlowController. + * Flow objects are responsible for routing a CAS through an Aggregate Engine by returning a Step * @see FlowController::computeFlow */ class UIMA_LINK_IMPORTSPEC Flow { + /** The CAS that this Flow object is handling. The Flow object can choose to use this method or not */ + CAS* inputCAS{nullptr}; public: virtual ~Flow(){}; + /** + * Specify the next destination for the CAS via a Step object + */ virtual Step next()=0; - virtual std::unique_ptr newCasProduced(const CAS&, const icu::UnicodeString&)=0; - virtual bool continueOnFailure(const icu::UnicodeString&) { return false; } + + /** + * This method is called by the framework if this Flow's CAS has been sent to a CAS Multiplier that has created + * a new output CAS. It may throw an exception if the Engine does not support CAS Multipliers. + * @param cas the new CAS that has been produced + * @param producedBy the key of the delegate engine that has produced this CAS + * @return a new Flow object that will route the output CAS + */ + virtual std::unique_ptr newCasProduced(const CAS& cas, const icu::UnicodeString& producedBy)=0; + + /** + * Called by the framework after a failure to see if the CAS should continue or not. + * @param failedEngine the key of the engine whose failure led to this call + * TODO: include the offending exception as a parameter? + * @return whether processing should continue or be aborted + */ + virtual bool continueOnFailure(const icu::UnicodeString& failedEngine) { return false; } + + /** + * Called by the framework to alert this Flow object that processing has been stopped on this CAS so it can perform any cleanup + */ virtual void aborted() { } + + void setCas(CAS* cas) { inputCAS = cas; } + CAS* getCas() const { return inputCAS; } }; diff --git a/src/framework/uima/flow_controller.hpp b/src/framework/uima/flow_controller.hpp index 1a54f8f..a76354e 100644 --- a/src/framework/uima/flow_controller.hpp +++ b/src/framework/uima/flow_controller.hpp @@ -43,6 +43,11 @@ namespace uima { /** * A FlowController dictates how CASes are routed within Aggregate Analysis Engines. + *

+ * For each new CAS that is passed to the Aggregate Analysis Engine containing the FlowController, + * FlowController::computeFlow will be called. This method must return a Flow object that is responsible for + * routing that CAS through the components of the Aggregate Analysis Engine. + *

*/ class UIMA_LINK_IMPORTSPEC FlowController { public: @@ -54,11 +59,29 @@ namespace uima { * Description of the AnalysisEngine that this Flow Controller belongs to. */ virtual void initialize(const AnnotatorContext& anContext)=0; + + /** Deinitialize this Flow Controller */ virtual void destroy()=0; + + /** Reconfigure this Flow Controller */ virtual void reconfigure()=0; - virtual std::unique_ptr computeFlow(const CAS&)=0; + + /** + * Computes and returns a Flow object will the input CAS through the Aggregate. The + * Flow object should be given a handle to the CAS, so that it can use information in + * the CAS to make routing decisions. + * FlowController implementations can define their own class that implements Flow. + * @param cas A CAS that this FlowController should process. + * @return a Flow object that has responsibility for routing cas through the + * Aggregate Analysis Engine. + */ + virtual std::unique_ptr computeFlow(CAS &cas)=0; }; + + /** + * This class represents the Flow object used in a Fixed Flow Controller + */ class UIMA_LINK_IMPORTSPEC FixedFlowObject : public Flow { public: FixedFlowObject(FixedFlowController *const flowController, int startStep, @@ -74,10 +97,19 @@ namespace uima { std::unique_ptr newCasProduced(const CAS& cas, const icu::UnicodeString& producedBy) override; private: + /** The Flow Controller that defines this Flow */ FixedFlowController* flowController; + + /** Index of the delegate sequence this Flow is at*/ int currentStep; + + /** Whether this flow's CAS was passed to a CAS Multiplier*/ bool wasPassedToCASMultiplier; + + /** Whether this flow's CAS has produced a new CAS */ bool newCASProduced; + + /** Whether this flow's CAS was produced internally by a CAS Multiplier */ bool internallyCreatedCAS; }; @@ -86,7 +118,9 @@ namespace uima { public: enum class ActionAfterCasMultiplier { CONTINUE, STOP, DROP, DROP_IF_NEW_CAS_PRODUCED }; - FixedFlowController() = default; + FixedFlowController() : delegateSpecifierMap(), flowContraints(), annotatorContext(), + action(ActionAfterCasMultiplier::DROP_IF_NEW_CAS_PRODUCED) { + } void initialize(const AnnotatorContext &anContext) override; @@ -94,7 +128,7 @@ namespace uima { void reconfigure() override; - std::unique_ptr computeFlow(const CAS&) override; + std::unique_ptr computeFlow(CAS &) override; const std::vector& getDelegateKeys() const; @@ -103,11 +137,24 @@ namespace uima { ActionAfterCasMultiplier getAction() const; private: - // FIXME: Use std::map instead?? - const std::map* delegateSpecifierMap{}; - const FixedFlow* flowContraints{}; - const AnnotatorContext* annotatorContext{}; - ActionAfterCasMultiplier action{ActionAfterCasMultiplier::DROP_IF_NEW_CAS_PRODUCED}; + /** Maps from delegate engine keys (not names) to their corresponding AnnotatorContexts */ + const std::map* delegateSpecifierMap; + + /** The FlowContraints object that defines this FlowController*/ + const FixedFlow* flowContraints; + + /** The AnnotatorContext of the aggregate engine that owns this FlowController */ + const AnnotatorContext* annotatorContext; + + /** The action to be taken after a CAS has been input to a CAS Multiplier. For now this cannot be overridden yet.\n + * Values include:\n + * - CONTINUE: the CAS will continue with the flow\n + * - STOP: the CAS will not continue with the flow and be returned\n + * - DROP: the CAS will not continue and be dropped\n + * - DROP_IF_NEW_CAS_PRODUCED (default): If the CAS Multiplier produced a new CAS from this input CAS then this CAS will + * be dropped, otherwise it will continue. + */ + ActionAfterCasMultiplier action; }; } From 0d635ca8c9e089535c3478b2b5563085a481795a Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 25 Jul 2024 22:57:15 -0600 Subject: [PATCH 07/22] framework: added documentation and comments Also introduced new Exception: EngineProcessingException --- src/framework/annotator_mgr.cpp | 181 ++++++++++++-------- src/framework/flow_controller.cpp | 11 +- src/framework/internal_engine_base.cpp | 2 + src/framework/uima/annotator_mgr.hpp | 7 + src/framework/uima/internal_engine_base.hpp | 4 + 5 files changed, 133 insertions(+), 72 deletions(-) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 514ca99..9df2f1c 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -569,90 +569,117 @@ namespace uima { return utRetVal; } - CAS* AnnotatorManager::processUntilNextOutputCas() { + CAS *AnnotatorManager::processUntilNextOutputCas() { unique_ptr flow{}; while (true) { - CAS* currentCas = nullptr; + CAS *currentCas = nullptr; Step nextStep; flow = nullptr; - while (!currentCas) { - if (casIterStack.empty()) return nullptr; + // get a cas from the stack + if (casIterStack.empty()) + return nullptr; - StackFrame &frame = casIterStack.top(); - try { - if (frame.casMultiplier && frame.casMultiplier->hasNext()) { - currentCas = &frame.casMultiplier->next(); - flow = frame.originalFlow->newCasProduced(*currentCas, frame.lastEngineKey); - } - } catch (Exception& exception) { - // TODO: - throw; + StackFrame &frame = casIterStack.top(); + try { + if (frame.casMultiplier && frame.casMultiplier->hasNext()) { + currentCas = &frame.casMultiplier->next(); + // compute flow for newly produced CAS + flow = frame.originalFlow->newCasProduced(*currentCas, frame.lastEngineKey); } + } catch (Exception &exception) { + if (!frame.originalFlow->continueOnFailure(frame.lastEngineKey /* ,exception */)) + throw; + } - if (!currentCas) { - currentCas = frame.originalCas; - flow = std::move(frame.originalFlow); - currentCas->setCurrentComponentInfo(nullptr); - casIterStack.pop(); - } + if (!currentCas) { + // if there is no more output CASes from the stack, take the original CAS that was processed by + // the CAS Multiplier and continue with its flow + currentCas = frame.originalCas; + flow = std::move(frame.originalFlow); + currentCas->setCurrentComponentInfo(nullptr); // is this necessary? + casIterStack.pop(); + } - if (nextStep.getType() == Step::StepType::UNSPECIFIED) { - nextStep = flow->next(); - } + if (nextStep.getType() == Step::StepType::UNSPECIFIED) { + nextStep = flow->next(); // get the next step for the current flow + } + + while (nextStep.getType() != Step::StepType::FINALSTEP) { + if (nextStep.getType() == Step::StepType::SIMPLESTEP) { + // find the AE specified by the step + const icu::UnicodeString &nextAEKey = nextStep.getSimpleStep()->getEngineName(); + auto it = std::find_if(iv_vecEntries.begin(), iv_vecEntries.end(), + [&, nextAEKey](const EngineEntry &entry) { + return entry.iv_pEngine->getAnnotatorContext().iv_AnCKey == nextAEKey; + }); + + if (it != iv_vecEntries.end()) { + AnalysisEngine *nextAE = it->iv_pEngine; + CAS *outputCas = nullptr; + + // call process one the AE and see if it has produced a new CAS + try { + CASIterator casIter = nextAE->processAndOutputNewCASes(*currentCas); + if (casIter.hasNext()) + outputCas = &casIter.next(); + } catch (Exception &e) { + if (!flow->continueOnFailure(nextAEKey)) + throw; + } - while (nextStep.getType() != Step::StepType::FINALSTEP) { - if (nextStep.getType() == Step::StepType::SIMPLESTEP) { - const icu::UnicodeString& nextAEName = nextStep.getSimpleStep()->getEngineName(); - auto it = std::find_if(iv_vecEntries.begin(), iv_vecEntries.end(), - [&, nextAEName](const EngineEntry &entry) { - return entry.iv_pEngine->getAnnotatorContext().iv_AnCKey == nextAEName; - }); - - if (it != iv_vecEntries.end()) { - AnalysisEngine* nextAE = it->iv_pEngine; - CAS* outputCas = nullptr; - - try { - CASIterator casIter = nextAE->processAndOutputNewCASes(*currentCas); - if (casIter.hasNext()) - outputCas = &casIter.next(); - } catch (Exception& e) { - // TODO: Handle exception by checking if we're allowed to continue on failure - } - - if (outputCas) { - std::unique_ptr nextFlow = flow->newCasProduced(*outputCas, nextAEName); - casIterStack.push({nextAE, currentCas, std::move(flow), nextAEName}); - flow = std::move(nextFlow); - currentCas = outputCas; - } else { - currentCas->setCurrentComponentInfo(nullptr); - } + if (outputCas) { + // new CAS is output so put the current components on the stack so we can process + // the other output CASes and original CASes later + std::unique_ptr nextFlow = flow->newCasProduced(*outputCas, nextAEKey); + casIterStack.push({nextAE, currentCas, std::move(flow), nextAEKey}); + flow = std::move(nextFlow); + currentCas = outputCas; } else { - // TODO: Throw invalid key exception + currentCas->setCurrentComponentInfo(nullptr); // is this necessary? } - } else if (nextStep.getType() == Step::StepType::PARALLELSTEP) { - // TODO: ParallelStep not supported yet } else { - // TODO: Throw unsupported step type + UIMA_EXC_THROW_NEW(EngineProcessingException, + UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS, + UIMA_MSG_ID_EXCON_PROCESSING_CAS, + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Unknown Delegate Key"), + ErrorInfo::unrecoverable); } - - nextStep = flow->next(); + } else if (nextStep.getType() == Step::StepType::PARALLELSTEP) { + // TODO: ParallelStep not supported yet + UIMA_EXC_THROW_NEW(NotYetImplementedException, + UIMA_ERR_NOT_YET_IMPLEMENTED, + UIMA_MSG_ID_EXC_NOT_YET_IMPLEMENTED, + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Parallel Step not supported yet"), + ErrorInfo::unrecoverable + ); + } else { + UIMA_EXC_THROW_NEW(EngineProcessingException, + UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS, + UIMA_MSG_ID_EXCON_PROCESSING_CAS, + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Unknown Step Type"), + ErrorInfo::unrecoverable); } - const FinalStep* finalStep = nextStep.getFinalStep(); - if (currentCas == inputCas) { - if (finalStep->getForceDropCAS()) { - // TODO: Throw excetion (not allowed to drop input CAS) - } - return nullptr; - } + nextStep = flow->next(); + } - if (iv_bOutputNewCases && !finalStep->getForceDropCAS()) - return currentCas; - currentCas->release(); + const FinalStep *finalStep = nextStep.getFinalStep(); + if (currentCas == inputCas) { + if (finalStep->getForceDropCAS()) { + // Not allowed to drop the input CAS so something must have gone wrong + UIMA_EXC_THROW_NEW(EngineProcessingException, + UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS, + UIMA_MSG_ID_EXCON_PROCESSING_CAS, + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Illegal CAS drop"), + ErrorInfo::unrecoverable); + } + return nullptr; } + + if (iv_bOutputNewCases && !finalStep->getForceDropCAS()) + return currentCas; + currentCas->release(); } } @@ -677,6 +704,20 @@ namespace uima { return *result; } + void AnnotatorManager::release() { + while (!casIterStack.empty()) { + StackFrame& frame = casIterStack.top(); + frame.originalFlow->aborted(); + casIterStack.pop(); + } + for (CAS *cas : activeCASes) { + if (cas != inputCas) + cas->release(); + } + + activeCASes.clear(); + } + TyErrorId AnnotatorManager::launchProcessDocument(CAS &cas, ResultSpecification const &crResultSpec) { //if engine uses Capability Language Flow @@ -693,8 +734,12 @@ namespace uima { inputCas = &cas; casIterStack.push({nullptr, inputCas, iv_pFlowController->computeFlow(*inputCas), {}}); - nextCas = processUntilNextOutputCas(); - + try { + nextCas = processUntilNextOutputCas(); + } catch (...) { + release(); + throw; + } return UIMA_ERR_NONE; } diff --git a/src/framework/flow_controller.cpp b/src/framework/flow_controller.cpp index cacea56..3f800c0 100644 --- a/src/framework/flow_controller.cpp +++ b/src/framework/flow_controller.cpp @@ -29,6 +29,7 @@ namespace uima { Step FixedFlowObject::next() { + // if this CAS had been passed to a CAS Multiplier in the previous step if (wasPassedToCASMultiplier) { switch (flowController->getAction()) { case FixedFlowController::ActionAfterCasMultiplier::CONTINUE: @@ -46,23 +47,25 @@ namespace uima { } const std::vector& delegateKeys = flowController->getDelegateKeys(); - if (currentStep >= delegateKeys.size()) + if (currentStep >= delegateKeys.size()) // this CAS has finished the sequence return Step(internal::FinalStep()); + // if the engine is a CAS Multiplier, set flag const icu::UnicodeString &engineName = delegateKeys[currentStep]; const AnnotatorContext* engineContext = flowController->getDelegateSpecifierMap().at(engineName); const AnalysisEngineMetaData* engineMetadata = engineContext->getTaeSpecifier().getAnalysisEngineMetaData(); const OperationalProperties* operationalProps = engineMetadata->getOperationalProperties(); - if ( operationalProps && operationalProps->getOutputsNewCASes()) + if ( operationalProps && operationalProps->getOutputsNewCASes() ) wasPassedToCASMultiplier = true; return Step(internal::SimpleStep(delegateKeys[currentStep++])); } std::unique_ptr FixedFlowObject::newCasProduced(const CAS &cas, const icu::UnicodeString &producedBy) { - newCASProduced = true; - const std::vector& delegateKeys = flowController->getDelegateKeys(); + newCASProduced = true; //input CAS has been processed by a CAS Multiplier + // start the new output CAS from the next node after the CAS Multiplier that produced it + const std::vector& delegateKeys = flowController->getDelegateKeys(); int i = 0; while (producedBy != delegateKeys.at(i)) ++i; diff --git a/src/framework/internal_engine_base.cpp b/src/framework/internal_engine_base.cpp index 42f9f58..4327cc9 100644 --- a/src/framework/internal_engine_base.cpp +++ b/src/framework/internal_engine_base.cpp @@ -63,6 +63,8 @@ /* Implementation */ /* ----------------------------------------------------------------------- */ namespace uima { + UIMA_EXC_CLASSIMPLEMENT(EngineProcessingException, uima::Exception); + namespace internal { uima::internal::EngineBase & EngineBase::promoteEngine( uima::AnalysisEngine & engine) { diff --git a/src/framework/uima/annotator_mgr.hpp b/src/framework/uima/annotator_mgr.hpp index 41915ee..fcaecaf 100644 --- a/src/framework/uima/annotator_mgr.hpp +++ b/src/framework/uima/annotator_mgr.hpp @@ -44,6 +44,7 @@ #include "uima/pragmas.hpp" //must be included first to disable warnings #include #include +#include #include "uima/annotator_timing.hpp" #include "uima/exceptions.hpp" @@ -231,6 +232,9 @@ namespace uima { TyAnnotatorEntries iv_vecEntries; std::stack casIterStack; + /** Active CASes during processing, released during exception handling*/ + std::unordered_set activeCASes; + size_t iv_uiNbrOfDocsProcessed; // for timing statistics FlowController* iv_pFlowController; CAS* inputCas{}; @@ -269,6 +273,9 @@ namespace uima { /** Called by Aggregate Engine's next */ CAS& next(); + /** Release all CASes currently in use by this */ + void release(); + /* COPY CONSTRUCTOR NOT SUPPORTED */ AnnotatorManager(const AnnotatorManager & ); //lint !e1704 /* ASSIGNMENT OPERATOR NOT SUPPORTED */ diff --git a/src/framework/uima/internal_engine_base.hpp b/src/framework/uima/internal_engine_base.hpp index 6e6df59..41f3d7d 100644 --- a/src/framework/uima/internal_engine_base.hpp +++ b/src/framework/uima/internal_engine_base.hpp @@ -64,6 +64,10 @@ namespace uima { /* Types / Classes */ /* ----------------------------------------------------------------------- */ namespace uima { + + /** Represents an exception relating to an engine's processing */ + UIMA_EXC_CLASSDECLARE(EngineProcessingException, uima::Exception); + namespace internal { /** From 0adb05c7a1f5bd54d257dc36b9a8952138632af0 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 25 Jul 2024 23:33:55 -0600 Subject: [PATCH 08/22] framework: fixed memory leak in AnnotatorManager FlowController is not deleted on destruction --- src/framework/annotator_mgr.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 9df2f1c..54085c3 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -104,6 +104,8 @@ namespace uima { launchDeInit(); } assert( iv_vecEntries.empty() ); + if (iv_pFlowController) + delete iv_pFlowController; } From 3d4086f5c388cff75400d1f8cd81e93c0a772739 Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 6 Aug 2024 04:47:23 -0600 Subject: [PATCH 09/22] bug: fixed undefined behavior in Step union The constructor for the UNSPECIFIED case didn't initialize the union properly, so when SimpleStep's assignment operator is called later on it can segfault when assigning string. --- src/framework/flow.cpp | 2 +- src/test/src/test_engine.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp index fcb4dc1..6c91b40 100644 --- a/src/framework/flow.cpp +++ b/src/framework/flow.cpp @@ -63,7 +63,7 @@ namespace uima { type = other.type; switch (other.type) { case StepType::SIMPLESTEP: - uStep.simpleStep = other.uStep.simpleStep; + new (&uStep.simpleStep) auto( other.uStep.simpleStep); break; case StepType::FINALSTEP: uStep.finalStep = other.uStep.finalStep; diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index 9976226..19e7eef 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -510,7 +510,8 @@ void testCasMultiplier(uima::util::ConsoleUI & rclConsole) num++; CAS & seg = iter.next(); failIfNotTrue(seg.getDocumentText().length() > 0); - pEngine->getAnnotatorContext().releaseCAS(seg); + // pEngine->getAnnotatorContext().releaseCAS(seg); + seg.release(); } failIfNotTrue(num==3); delete pEngine; From 5f7ae16932421c66b36075d314afd916575ffe96 Mon Sep 17 00:00:00 2001 From: mac-op Date: Wed, 7 Aug 2024 01:39:35 -0600 Subject: [PATCH 10/22] bug: fixed another undefined behavior in Step union The union needed to be properly destroyed in assignment operator, and initialized in copy constructor and AO --- src/framework/flow.cpp | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp index 6c91b40..8cd9bce 100644 --- a/src/framework/flow.cpp +++ b/src/framework/flow.cpp @@ -43,13 +43,16 @@ namespace uima { } Step::Step(const Step& other) :type(other.type){ + // properly initialize union value that was not constructed (invalid) switch (other.type) { case StepType::SIMPLESTEP: - uStep.simpleStep = other.uStep.simpleStep; + new (&uStep.simpleStep) auto(other.uStep.simpleStep); break; case StepType::FINALSTEP: - uStep.finalStep = other.uStep.finalStep; + new (&uStep.finalStep) auto(other.uStep.finalStep); break; + case StepType::PARALLELSTEP: + new (&uStep.parallelStep) auto(other.uStep.parallelStep); default: break; } @@ -60,13 +63,31 @@ namespace uima { if (this == &other) { return *this; } + // destroy the current type in the union and reinitialize it with other + switch (this->type) { + case StepType::SIMPLESTEP: + uStep.simpleStep.~SimpleStep(); + break; + case StepType::FINALSTEP: + uStep.finalStep.~FinalStep(); + break; + case StepType::PARALLELSTEP: + uStep.parallelStep.~ParallelStep(); + break; + default: + break; + } + type = other.type; switch (other.type) { case StepType::SIMPLESTEP: - new (&uStep.simpleStep) auto( other.uStep.simpleStep); + new (&uStep.simpleStep) auto(other.uStep.simpleStep); break; case StepType::FINALSTEP: - uStep.finalStep = other.uStep.finalStep; + new (&uStep.finalStep) auto(other.uStep.finalStep); + break; + case StepType::PARALLELSTEP: + new (&uStep.parallelStep) auto(other.uStep.parallelStep); break; default: break; @@ -78,13 +99,13 @@ namespace uima { switch (type) { case StepType::SIMPLESTEP: uStep.simpleStep.~SimpleStep(); - break; + break; case StepType::FINALSTEP: uStep.finalStep.~FinalStep(); - break; + break; case StepType::PARALLELSTEP: uStep.parallelStep.~ParallelStep(); - break; + break; default: break; } From f5d045b3fde554bffa02f37d461b9cf87f5398d8 Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 13 Aug 2024 05:22:44 -0600 Subject: [PATCH 11/22] framework: introduce owner variable for CASes in Pool --- src/cas/cas.cpp | 15 +++++++++------ src/cas/uima/cas.hpp | 2 ++ src/framework/caspool.cpp | 15 +++++++-------- src/framework/uima/caspool.hpp | 2 +- src/test/src/test_engine.cpp | 2 +- src/utils/runAECpp.cpp | 2 +- 6 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index d0a3df8..8300acf 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -179,6 +179,7 @@ namespace uima { iv_indexRepository(NULL), iv_filterBuilder(NULL), iv_componentInfo(NULL), + iv_owner(NULL), iv_utDocumentType(uima::lowlevel::TypeSystem::INVALID_TYPE), iv_utDocumentLangAsIntFeat(uima::lowlevel::TypeSystem::INVALID_FEATURE), iv_utDocumentLangAsStrFeat(uima::lowlevel::TypeSystem::INVALID_FEATURE), @@ -216,6 +217,7 @@ namespace uima { iv_cpDocument(NULL), iv_uiDocumentLength(0), iv_copyOfDocument(NULL), + iv_owner(NULL), iv_tyDocumentAnnotation(uima::lowlevel::FSHeap::INVALID_FS) { iv_casDefinition = inCas->iv_casDefinition; iv_typeSystem = inCas->iv_typeSystem; @@ -262,6 +264,7 @@ namespace uima { iv_sofaCount(0), initialSofaCreated(false), iv_initialView(NULL), + iv_owner(NULL), iv_indexRepository(NULL), iv_filterBuilder(NULL), iv_componentInfo(NULL), @@ -296,7 +299,7 @@ namespace uima { } CAS::~CAS() { - + //always delete index repository if (this->iv_indexRepository != NULL) { delete iv_indexRepository; @@ -309,7 +312,7 @@ namespace uima { //initial call to delete object if (this->isbaseCas) { this->iv_baseCas->isDeletingViews = true; - + if (this->iv_baseCas->iv_heap != NULL) { delete this->iv_baseCas->iv_heap; this->iv_baseCas->iv_heap = NULL; @@ -319,7 +322,7 @@ namespace uima { iv_baseCas->iv_filterBuilder = NULL; } if (this->iv_baseCas->bOwnsCASDefinition ) { - if (this->iv_baseCas->iv_casDefinition != NULL) { + if (this->iv_baseCas->iv_casDefinition != NULL) { delete this->iv_baseCas->iv_casDefinition; this->iv_baseCas->iv_casDefinition = NULL; } @@ -334,7 +337,7 @@ namespace uima { } //this->iv_baseCas->iv_sofa2tcasMap.clear( ); //this->iv_baseCas->iv_sofa2indexMap.clear(); - } + } } else { if (!this->iv_baseCas->isDeletingViews) { dropView(this->getSofaNum()); @@ -810,8 +813,8 @@ namespace uima { } void CAS::release() { - if (iv_componentInfo) { - iv_componentInfo->releaseCAS(*this); + if (iv_owner) { + iv_owner->releaseCAS(*this); } else std::cerr << "No AnnotatorContext" << "\n\n"; } diff --git a/src/cas/uima/cas.hpp b/src/cas/uima/cas.hpp index 3834b57..dae55b1 100644 --- a/src/cas/uima/cas.hpp +++ b/src/cas/uima/cas.hpp @@ -231,6 +231,7 @@ namespace uima { bool initialSofaCreated; bool isDeletingViews; //set this flag to true when destroying CAS AnnotatorContext *iv_componentInfo; + AnnotatorContext *iv_owner; uima::lowlevel::TyFSType iv_utDocumentType; uima::lowlevel::TyFSFeature iv_utDocumentLangAsIntFeat; @@ -775,6 +776,7 @@ namespace uima { /** * When called this CAS will release itself by calling releaseCas on the AnnotatorContext that owns it. + * NOTE: This only works for CASes that have an owner, ie. belong to a CASPool. */ void release(); diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index f230bfe..6fb414b 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -53,7 +53,7 @@ namespace uima { iv_vecFreeInstances(), iv_pCasDef(NULL), iv_numInstances(numInstances), - iv_pComponentInfo(nullptr) { + iv_pOwner(nullptr) { iv_pCasDef = uima::internal::CASDefinition::createCASDefinition(taeSpec); if (iv_pCasDef == NULL) { @@ -84,7 +84,7 @@ namespace uima { iv_vecFreeInstances(), iv_numInstances(numInstances), iv_pCasDef(nullptr), - iv_pComponentInfo(anContext) { + iv_pOwner(anContext) { iv_pCasDef = internal::CASDefinition::createCASDefinition(taeSpec); if (iv_pCasDef == nullptr) { UIMA_EXC_THROW_NEW(CASPoolException, @@ -103,10 +103,10 @@ namespace uima { UIMA_MSG_ID_EXC_CREATE_CASPOOL, ErrorInfo::unrecoverable); } - if (iv_pComponentInfo) - pCas->setCurrentComponentInfo(iv_pComponentInfo); - iv_vecAllInstances.push_back((CAS *) pCas->getInitialView()); - iv_vecFreeInstances.push_back((CAS *) pCas->getInitialView()); + CAS *initialView = pCas->getInitialView(); + initialView->iv_owner = iv_pOwner; + iv_vecAllInstances.push_back(initialView); + iv_vecFreeInstances.push_back(initialView); } } @@ -149,9 +149,8 @@ namespace uima { aCas.reset(); if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) == iv_vecAllInstances.end()) - std::cerr << "False: " << iv_pComponentInfo->getTaeSpecifier().getAnnotatorImpName() << std::endl; + std::cerr << "False: " << iv_pOwner->getTaeSpecifier().getAnnotatorImpName() << std::endl; iv_vecFreeInstances.push_back(&aCas); - return; } } //namespace diff --git a/src/framework/uima/caspool.hpp b/src/framework/uima/caspool.hpp index eb6e63b..79f7d0b 100644 --- a/src/framework/uima/caspool.hpp +++ b/src/framework/uima/caspool.hpp @@ -67,7 +67,7 @@ namespace uima { std::vector iv_vecFreeInstances; size_t iv_numInstances; uima::internal::CASDefinition * iv_pCasDef; - AnnotatorContext* iv_pComponentInfo; + AnnotatorContext* iv_pOwner; public: /** Constructor diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index 19e7eef..f17dba0 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -612,7 +612,7 @@ void testAggregateCASCombiner(const util::ConsoleUI &rclConsole) srcDocIt.moveToNext(); failIfNotTrue(srcDocIt.isValid()); - pEngine->getAnnotatorContext().releaseCAS(rcas); + rcas.release(); } failIfNotTrue(numOutputs == 2); diff --git a/src/utils/runAECpp.cpp b/src/utils/runAECpp.cpp index cf73ee6..a205d8d 100644 --- a/src/utils/runAECpp.cpp +++ b/src/utils/runAECpp.cpp @@ -422,7 +422,7 @@ void process (AnalysisEngine * pEngine, CAS * cas, std::string in, std::string o } //release the CAS - pEngine->getAnnotatorContext().releaseCAS(outCas); + outCas.release(); cout << "runAECpp::processing new Cas " << i << endl; } From e3f1b33d0abb1c183fe16b31b42c32d88cc375ae Mon Sep 17 00:00:00 2001 From: mac-op Date: Sun, 18 Aug 2024 17:12:55 -0600 Subject: [PATCH 12/22] clean up for CAS and CASPool --- src/cas/cas.cpp | 3 +-- src/framework/annotator_mgr.cpp | 8 ++++++-- src/framework/caspool.cpp | 2 -- src/framework/uima/annotator_mgr.hpp | 4 ++++ 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index 8300acf..16f8375 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -815,8 +815,7 @@ namespace uima { void CAS::release() { if (iv_owner) { iv_owner->releaseCAS(*this); - } else - std::cerr << "No AnnotatorContext" << "\n\n"; + } } ANIndex CAS::getAnnotationIndex(Type const & crType) { diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 54085c3..555f06a 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -599,10 +599,12 @@ namespace uima { // the CAS Multiplier and continue with its flow currentCas = frame.originalCas; flow = std::move(frame.originalFlow); - currentCas->setCurrentComponentInfo(nullptr); // is this necessary? + currentCas->setCurrentComponentInfo(nullptr); casIterStack.pop(); } + activeCASes.insert(currentCas); + if (nextStep.getType() == Step::StepType::UNSPECIFIED) { nextStep = flow->next(); // get the next step for the current flow } @@ -637,8 +639,10 @@ namespace uima { casIterStack.push({nextAE, currentCas, std::move(flow), nextAEKey}); flow = std::move(nextFlow); currentCas = outputCas; + activeCASes.insert(currentCas); } else { - currentCas->setCurrentComponentInfo(nullptr); // is this necessary? + // No new CASes are output, this CAS is done being processed by the current engine. + currentCas->setCurrentComponentInfo(nullptr); } } else { UIMA_EXC_THROW_NEW(EngineProcessingException, diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index 6fb414b..171c63f 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -148,8 +148,6 @@ namespace uima { void CASPool::releaseCAS(CAS & aCas) { aCas.reset(); - if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) == iv_vecAllInstances.end()) - std::cerr << "False: " << iv_pOwner->getTaeSpecifier().getAnnotatorImpName() << std::endl; iv_vecFreeInstances.push_back(&aCas); } diff --git a/src/framework/uima/annotator_mgr.hpp b/src/framework/uima/annotator_mgr.hpp index fcaecaf..c7040c1 100644 --- a/src/framework/uima/annotator_mgr.hpp +++ b/src/framework/uima/annotator_mgr.hpp @@ -219,9 +219,13 @@ namespace uima { EngineEntry; struct StackFrame { + /* The delegate engine that produced new CASes */ AnalysisEngine* casMultiplier; + /* The CAS that was input to the CAS Multiplier */ CAS* originalCas; + /* The Flow object for this CAS */ std::unique_ptr originalFlow; + /* The delegate key of the engine that produced new CASes */ icu::UnicodeString lastEngineKey; }; /* --- types --- */ From 20342170f95dd226aeec63a24315741f3aefb887 Mon Sep 17 00:00:00 2001 From: mac-op Date: Mon, 19 Aug 2024 19:38:02 -0600 Subject: [PATCH 13/22] framework: Added check for invalid call to CAS::release This introduces a new error ID and error message UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE --- src/cas/cas.cpp | 9 +++++++++ src/framework/uima/err_ids.h | 5 +++++ src/framework/uima/msg.h | 1 + src/framework/uima/msgstrtab.h | 2 ++ 4 files changed, 17 insertions(+) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index 16f8375..a2deecf 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -815,6 +815,15 @@ namespace uima { void CAS::release() { if (iv_owner) { iv_owner->releaseCAS(*this); + } else { + ErrorMessage msg(UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE); + msg.addParam("This CAS does not have any owner"); + UIMA_EXC_THROW_NEW(CASException, + UIMA_ERR_CAS_RELEASE, + msg, + ErrorMessage(UIMA_MSG_ID_EXCON_UNKNOWN_CONTEXT), + ErrorInfo::recoverable + ); } } diff --git a/src/framework/uima/err_ids.h b/src/framework/uima/err_ids.h index 421e00a..99aaf00 100644 --- a/src/framework/uima/err_ids.h +++ b/src/framework/uima/err_ids.h @@ -993,6 +993,11 @@ namespace uima { { UIMA_ERR_CODEPAGE , _TEXT("UIMA_ERR_CASPOOL_GET_CAS") }, #endif + /** CAS release errors */ +#define UIMA_ERR_CAS_RELEASE ((uima::TyErrorId)( 71 + UIMA_ERR_ENGINE_OFFSET )) +#ifdef UIMA_ENGINE_MAIN_CPP + { UIMA_ERR_CODEPAGE , _TEXT("UIMA_ERR_CAS_RELEASE") }, +#endif /*@}*/ diff --git a/src/framework/uima/msg.h b/src/framework/uima/msg.h index 785154d..1a1ecca 100644 --- a/src/framework/uima/msg.h +++ b/src/framework/uima/msg.h @@ -366,4 +366,5 @@ #define UIMA_MSG_ID_EXC_NO_FREE_CAS 329 #define UIMA_MSG_ID_EXC_INVALID_CALL_TO_NEXT 330 #define UIMA_MSG_ID_SIGNATURE_END 331 +#define UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE 332 #endif diff --git a/src/framework/uima/msgstrtab.h b/src/framework/uima/msgstrtab.h index b5854fd..db6e152 100644 --- a/src/framework/uima/msgstrtab.h +++ b/src/framework/uima/msgstrtab.h @@ -702,6 +702,8 @@ static const TCHAR * gs_aszMessageStringTable[] = { "Invalid call to next(). ", /* 331 - UIMA_MSG_ID_SIGNATURE_END: */ "[UIMA-LIBRARY]", + /* 332 - UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE */ + "Invalid release of CAS." } ; #endif /* UIMA_MSGSTRTAB_H */ From d49a58ef152032ddc5f0afb74889e3caba12255b Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 20 Aug 2024 13:09:15 -0600 Subject: [PATCH 14/22] framework: formatting and documentation --- src/cas/cas.cpp | 8 +- src/framework/annotator_mgr.cpp | 134 ++++++++++-------- src/framework/flow.cpp | 2 +- src/framework/uima/annotator_mgr.hpp | 2 +- src/framework/uima/flow_controller.hpp | 5 +- .../data/descriptors/SimpleTextSegmenter.xml | 2 +- 6 files changed, 81 insertions(+), 72 deletions(-) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index a2deecf..bd47e2e 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -299,7 +299,7 @@ namespace uima { } CAS::~CAS() { - + //always delete index repository if (this->iv_indexRepository != NULL) { delete iv_indexRepository; @@ -312,7 +312,7 @@ namespace uima { //initial call to delete object if (this->isbaseCas) { this->iv_baseCas->isDeletingViews = true; - + if (this->iv_baseCas->iv_heap != NULL) { delete this->iv_baseCas->iv_heap; this->iv_baseCas->iv_heap = NULL; @@ -322,7 +322,7 @@ namespace uima { iv_baseCas->iv_filterBuilder = NULL; } if (this->iv_baseCas->bOwnsCASDefinition ) { - if (this->iv_baseCas->iv_casDefinition != NULL) { + if (this->iv_baseCas->iv_casDefinition != NULL) { delete this->iv_baseCas->iv_casDefinition; this->iv_baseCas->iv_casDefinition = NULL; } @@ -337,7 +337,7 @@ namespace uima { } //this->iv_baseCas->iv_sofa2tcasMap.clear( ); //this->iv_baseCas->iv_sofa2indexMap.clear(); - } + } } else { if (!this->iv_baseCas->isDeletingViews) { dropView(this->getSofaNum()); diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 555f06a..6371f2e 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -86,13 +86,12 @@ namespace uima { /* Implementation */ /* ----------------------------------------------------------------------- */ - AnnotatorManager::AnnotatorManager(internal::AggregateEngine & rEngine) : iv_pEngine(&rEngine), - iv_vecEntries(), - iv_uiNbrOfDocsProcessed(0), - iv_pFlowController(nullptr), - iv_bIsInitialized(false), - iv_bOutputNewCases(false) - /* ----------------------------------------------------------------------- */ { + AnnotatorManager::AnnotatorManager(internal::AggregateEngine & rEngine) : + iv_pEngine(& rEngine), + iv_vecEntries(), + iv_bIsInitialized(false), + iv_uiNbrOfDocsProcessed(0) + /* ----------------------------------------------------------------------- */{ ; } @@ -391,10 +390,10 @@ namespace uima { bool AnnotatorManager::shouldEngineBeCalled(uima::internal::CapabilityContainer const & crCapContainer, - ResultSpecification const &rResultSpec, - Language const &crLanguage, - vector &rTOFSToBeRemoved) { - util::Trace clTrace(util::enTraceDetailHigh, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); + ResultSpecification const & rResultSpec, + Language const & crLanguage, + vector & rTOFSToBeRemoved) { + util::Trace clTrace(util::enTraceDetailHigh, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); #ifdef DEBUG_VERBOSE UIMA_TPRINT("CapContainer:"); @@ -415,24 +414,28 @@ namespace uima { #endif // treat dump-like annotators for this language specially - if (crCapContainer.hasEmptyOutputTypeOrFeatures(crLanguage)) { + if (crCapContainer.hasEmptyOutputTypeOrFeatures( crLanguage )) { return true; } - ResultSpecification::TyTypeOrFeatureSTLSet const &crTOFSet = rResultSpec.getTypeOrFeatureSTLSet(); + ResultSpecification::TyTypeOrFeatureSTLSet const & crTOFSet = rResultSpec.getTypeOrFeatureSTLSet(); bool bHasTOF = false; - for (const auto & crTOF : crTOFSet) { - assert(crTOF.isValid()); - assert(rResultSpec.contains( crTOF )); + ResultSpecification::TyTypeOrFeatureSTLSet::const_iterator cit; + for (cit = crTOFSet.begin(); cit != crTOFSet.end(); ++cit) { + TypeOrFeature const & crTOF = (*cit); + assert( (*cit).isValid() ); + assert( crTOF.isValid() ); + assert( rResultSpec.contains( crTOF ) ); + UIMA_TPRINT(" TOF Name: " << crTOF.getName()); if (crCapContainer.hasOutputTypeOrFeature(crTOF, crLanguage)) { - assert(containsTOF(crTOF, crLanguage, crCapContainer)); - UIMA_TPRINT(" in capability"); + assert( containsTOF(crTOF, crLanguage, crCapContainer) ); + UIMA_TPRINT( " in capability" ); bHasTOF = true; rTOFSToBeRemoved.push_back(crTOF); } else { - assert(! containsTOF(crTOF, crLanguage, crCapContainer)); + assert( ! containsTOF(crTOF, crLanguage, crCapContainer) ); UIMA_TPRINT(" not in capability"); } } @@ -453,14 +456,14 @@ namespace uima { it nonetheless must sepcify that it needs tokens, sentences, and paragraphs because this is what the summarizer needs as input. */ - util::Trace clTrace(util::enTraceDetailLow, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); + util::Trace clTrace(util::enTraceDetailLow, UIMA_TRACE_ORIGIN, UIMA_TRACE_COMPID_ANNOTATOR_MGR); UIMA_ANNOTATOR_TIMING(iv_clTimerLaunchProcess.start()); - - TyErrorId utErrorId = UIMA_ERR_NONE; - TyErrorId utRetVal = UIMA_ERR_NONE; - assert(EXISTS(iv_pEngine)); - size_t uiNbrOfSkippedAnnotators = 0; - CAS *tcas = nullptr; + TyAnnotatorEntries::iterator it; + TyErrorId utErrorId = UIMA_ERR_NONE; + TyErrorId utRetVal = UIMA_ERR_NONE; + assert( EXISTS(iv_pEngine) ); + size_t uiNbrOfSkippedAnnotators = 0; + CAS * tcas=NULL; // copy the result spec ResultSpecification resSpec = crResultSpec; @@ -469,9 +472,10 @@ namespace uima { assert(iv_bIsInitialized); assert(!iv_vecEntries.empty()); - for (EngineEntry &engineEntry: iv_vecEntries) { - AnalysisEngine *pEngine = engineEntry.iv_pEngine; - CapabilityContainer *pCapContainer = engineEntry.iv_pCapabilityContainer; + for (it = iv_vecEntries.begin(); it != iv_vecEntries.end(); ++it) { + EngineEntry & rEntry = (*it); + AnalysisEngine * pEngine = rEntry.iv_pEngine; + uima::internal::CapabilityContainer * pCapContainer = rEntry.iv_pCapabilityContainer; assert(EXISTS(pEngine)); assert(EXISTS(pCapContainer)); @@ -481,12 +485,12 @@ namespace uima { resSpec.print(cout); #endif - UIMA_TRACE_STREAM_ARG(clTrace, "ASB checks engine", pEngine->getAnalysisEngineMetaData().getName()); + UIMA_TRACE_STREAM_ARG(clTrace, "ASB checks engine", pEngine->getAnalysisEngineMetaData().getName() ); UIMA_TPRINT("--------- Checking annotator: " << pEngine->getAnalysisEngineMetaData().getName()); vector tofsToBeRemoved; - bool callEngine = true; - bool requiresTCas = true; + bool callEngine=true; + bool requiresTCas=true; if (cas.isBackwardCompatibleCas()) { tcas = &cas; @@ -497,7 +501,8 @@ namespace uima { cas.getDocumentAnnotation().getLanguage(), tofsToBeRemoved); - if (callEngine) { + if ( callEngine ) { + UIMA_TPRINT("----------- engine will be processed"); UIMA_TRACE_STREAM(clTrace, "Engine will be called"); @@ -505,64 +510,67 @@ namespace uima { // this must be done because an annotator should only be called with the result spec // that its XML file indicates. ResultSpecification annResSpec; - for (const TypeOrFeature &tof: tofsToBeRemoved) { - assert(tof.isValid()); - annResSpec.add(tof); - UIMA_TRACE_STREAM_ARG(clTrace, " engine is called with result spec", tof.getName()); + vector::const_iterator citTOF; + for (citTOF = tofsToBeRemoved.begin(); citTOF != tofsToBeRemoved.end(); ++citTOF) { + assert( (*citTOF).isValid() ); + annResSpec.add(*citTOF); + UIMA_TRACE_STREAM_ARG(clTrace, " engine is called with result spec", (*citTOF).getName() ); } /// does engine expect a TCas //AEs that declare at least one input or output SofA should be sent the base CAS. //Otherwise they must be sent a TCAS. - const AnalysisEngineMetaData::TyVecpCapabilities &vecCap = pEngine->getAnalysisEngineMetaData(). - getCapabilites(); - for (Capability *cap: vecCap) { - const auto &inputSofa = cap->getCapabilitySofas(Capability::INPUTSOFA); - const auto &outputSofa = cap->getCapabilitySofas(Capability::OUTPUTSOFA); - if (!inputSofa.empty() || !outputSofa.empty()) { + const AnalysisEngineMetaData::TyVecpCapabilities & vecCap = pEngine->getAnalysisEngineMetaData().getCapabilites(); + AnalysisEngineMetaData::TyVecpCapabilities::const_iterator itCap; + for (size_t i=0; i < vecCap.size(); i++) { + Capability * cap = vecCap.at(i); + Capability::TyVecCapabilitySofas inputSofa = cap->getCapabilitySofas(Capability::INPUTSOFA); + Capability::TyVecCapabilitySofas outputSofa = cap->getCapabilitySofas(Capability::OUTPUTSOFA); + if (inputSofa.size() > 0 || outputSofa.size() > 0) { requiresTCas = false; break; } } if (requiresTCas) { - SofaFS defSofa = cas.getSofa(pEngine->getAnnotatorContext().mapToSofaID(CAS::NAME_DEFAULT_TEXT_SOFA)); - if (!defSofa.isValid()) { - //TODO: throw exception - cerr << "could not get default text sofa " << endl; - return 99; - } - tcas = cas.getView(defSofa); - utErrorId = pEngine->process(*tcas, annResSpec); + SofaFS defSofa = cas.getSofa(pEngine->getAnnotatorContext().mapToSofaID(CAS::NAME_DEFAULT_TEXT_SOFA)); + if (!defSofa.isValid()) { + //TODO: throw exception + cerr << "could not get default text sofa " << endl; + return 99; + } + tcas = cas.getView(defSofa); + utErrorId = pEngine->process(*tcas, annResSpec); } else { - utErrorId = pEngine->process(cas, annResSpec); + utErrorId = ((AnalysisEngine*) pEngine)->process(cas, annResSpec); } if (utErrorId != UIMA_ERR_NONE) { clTrace.dump(_TEXT("Error"), (long) utErrorId); - utRetVal = utErrorId; /* I know, this overwrites a previous error */ + utRetVal = utErrorId; /* I know, this overwrites a previous error */ } else { // now remove TOFs from ResultSpec - for (const auto &crTof: tofsToBeRemoved) { - assert(crTof.isValid()); - resSpec.remove(crTof); + vector::const_iterator citTOF; + for (citTOF = tofsToBeRemoved.begin(); citTOF != tofsToBeRemoved.end(); ++citTOF) { + assert( (*citTOF).isValid() ); + resSpec.remove(*citTOF); } } } else { - assert(tofsToBeRemoved.empty()); + assert( tofsToBeRemoved.empty() ); UIMA_TPRINT("----------- engine will *not* be processed"); ++uiNbrOfSkippedAnnotators; } - } /* e-o-for */ + } /* e-o-for */ /* in case there was no error but not any annotator which generates a target type has been caled for process, we have an error */ UIMA_TPRINT("Annotators skipped due to unsupport lang: " << uiNbrOfSkippedAnnotators); - UIMA_TPRINT("Overall number of annotators: " << iv_vecEntries.size()); + UIMA_TPRINT("Overall number of annotators: " << iv_vecEntries.size() ); - if ((utRetVal == UIMA_ERR_NONE) - && (uiNbrOfSkippedAnnotators > 0) - && (uiNbrOfSkippedAnnotators == iv_vecEntries.size()) - && (crResultSpec.getSize() > 0)) { + if ( (utRetVal == UIMA_ERR_NONE) + && (uiNbrOfSkippedAnnotators > 0) + && (uiNbrOfSkippedAnnotators == iv_vecEntries.size()) + && (crResultSpec.getSize() > 0) ) { // utRetVal = UIMA_ERR_ANNOTATOR_MGR_LANG_NOT_SUPPORTED_FOR_ANNOTATOR; iv_pEngine->getAnnotatorContext().getLogger().logWarning("All annotators skipped (maybe unsupported language)"); } diff --git a/src/framework/flow.cpp b/src/framework/flow.cpp index 8cd9bce..b230aaa 100644 --- a/src/framework/flow.cpp +++ b/src/framework/flow.cpp @@ -42,7 +42,7 @@ namespace uima { type(StepType::FINALSTEP) { } - Step::Step(const Step& other) :type(other.type){ + Step::Step(const Step& other) :type(other.type) { // properly initialize union value that was not constructed (invalid) switch (other.type) { case StepType::SIMPLESTEP: diff --git a/src/framework/uima/annotator_mgr.hpp b/src/framework/uima/annotator_mgr.hpp index c7040c1..be9ed1b 100644 --- a/src/framework/uima/annotator_mgr.hpp +++ b/src/framework/uima/annotator_mgr.hpp @@ -31,7 +31,7 @@ 4/26/1999 Initial creation 1/17/2000 Autom. priorisation of annotators added - + 8/20/2024 CAS Multiplier capabilites added -------------------------------------------------------------------------- */ #ifndef UIMA_ANNOTATOR_MGR_HPP diff --git a/src/framework/uima/flow_controller.hpp b/src/framework/uima/flow_controller.hpp index a76354e..477d069 100644 --- a/src/framework/uima/flow_controller.hpp +++ b/src/framework/uima/flow_controller.hpp @@ -26,11 +26,12 @@ ----------------------------------------------------------------------------- - Description: + Description: This file contains the FlowController class and Flow interface + that control the flow of CASes inside an Aggregate Analysis Engine ----------------------------------------------------------------------------- - + 7/18/2024: created -------------------------------------------------------------------------- */ #include "uima/annotator_context.hpp" diff --git a/src/test/data/descriptors/SimpleTextSegmenter.xml b/src/test/data/descriptors/SimpleTextSegmenter.xml index 84067c8..db5d7fe 100644 --- a/src/test/data/descriptors/SimpleTextSegmenter.xml +++ b/src/test/data/descriptors/SimpleTextSegmenter.xml @@ -32,7 +32,7 @@ Simple Text Segmenter Splits a text document into pieces. The point at which the text is split is determined by - SegmentDelimiter configuration parameter which defaults to '.' + SegmentDelimiter configuration parameter which defaults to new line ('\n'). The last segment in the document will have lastSegment set to true. 1.0 From 88ba14a0ffb2f23d802425e262b2d6eb96503127 Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 20 Aug 2024 15:55:03 -0600 Subject: [PATCH 15/22] framework: correctly setting the CAS Owner in CAS Pool --- src/cas/cas.cpp | 10 +++++++--- src/cas/uima/cas.hpp | 2 ++ src/framework/caspool.cpp | 12 ++++++++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/cas/cas.cpp b/src/cas/cas.cpp index bd47e2e..6afc152 100644 --- a/src/cas/cas.cpp +++ b/src/cas/cas.cpp @@ -217,12 +217,12 @@ namespace uima { iv_cpDocument(NULL), iv_uiDocumentLength(0), iv_copyOfDocument(NULL), - iv_owner(NULL), iv_tyDocumentAnnotation(uima::lowlevel::FSHeap::INVALID_FS) { iv_casDefinition = inCas->iv_casDefinition; iv_typeSystem = inCas->iv_typeSystem; iv_heap = inCas->iv_heap; iv_componentInfo = inCas->iv_componentInfo; + iv_owner = inCas->iv_owner; iv_utDocumentLangAsIntFeat = uima::lowlevel::TypeSystem::INVALID_FEATURE; iv_utDocumentLangAsStrFeat = uima::lowlevel::TypeSystem::INVALID_FEATURE; refreshCachedTypes(); @@ -679,6 +679,10 @@ namespace uima { ); } + void CAS::setOwner(AnnotatorContext *owner) { + iv_baseCas->iv_owner = owner; + } + // deprecated version void CAS::setDocumentText(UChar const * cpDocument, size_t uiLength, bool bCopyToCAS ) { if (cpDocument == NULL) { @@ -813,8 +817,8 @@ namespace uima { } void CAS::release() { - if (iv_owner) { - iv_owner->releaseCAS(*this); + if (iv_baseCas->iv_owner) { + iv_baseCas->iv_owner->releaseCAS(*this); } else { ErrorMessage msg(UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE); msg.addParam("This CAS does not have any owner"); diff --git a/src/cas/uima/cas.hpp b/src/cas/uima/cas.hpp index dae55b1..8de6fe7 100644 --- a/src/cas/uima/cas.hpp +++ b/src/cas/uima/cas.hpp @@ -191,6 +191,8 @@ namespace uima { void bumpSofaCount(); void invalidBaseCasMethod(); + /** Set the owner of the base CAS */ + void setOwner(AnnotatorContext* owner); void registerView(SofaFS); void updateDocumentAnnotation( ); void copyDocumentString(UnicodeStringRef); diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index 171c63f..395f649 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -103,8 +103,8 @@ namespace uima { UIMA_MSG_ID_EXC_CREATE_CASPOOL, ErrorInfo::unrecoverable); } + pCas->setOwner(iv_pOwner); CAS *initialView = pCas->getInitialView(); - initialView->iv_owner = iv_pOwner; iv_vecAllInstances.push_back(initialView); iv_vecFreeInstances.push_back(initialView); } @@ -146,7 +146,15 @@ namespace uima { } void CASPool::releaseCAS(CAS & aCas) { - + if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) != iv_vecAllInstances.end()) { + ErrorMessage msg(UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE); + msg.addParam("This CAS does not belong to this CAS Pool"); + UIMA_EXC_THROW_NEW(CASPoolException, + UIMA_ERR_CAS_RELEASE, + msg, + ErrorMessage(UIMA_MSG_ID_EXCON_UNKNOWN_CONTEXT), + ErrorInfo::recoverable); + } aCas.reset(); iv_vecFreeInstances.push_back(&aCas); } From f89eb88703bcd513124dd82e620181fdcba8777f Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 20 Aug 2024 16:03:43 -0600 Subject: [PATCH 16/22] fix: small bug from last commit in releaseCAS --- src/framework/caspool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index 395f649..fee8308 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -146,7 +146,7 @@ namespace uima { } void CASPool::releaseCAS(CAS & aCas) { - if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) != iv_vecAllInstances.end()) { + if (std::find(iv_vecAllInstances.begin(), iv_vecAllInstances.end(), &aCas) == iv_vecAllInstances.end()) { ErrorMessage msg(UIMA_MSG_ID_EXC_INVALID_CAS_RELEASE); msg.addParam("This CAS does not belong to this CAS Pool"); UIMA_EXC_THROW_NEW(CASPoolException, From 5966b78aeb0baf905ab55b60e88264a69c818cb0 Mon Sep 17 00:00:00 2001 From: mac-op Date: Tue, 20 Aug 2024 17:07:14 -0600 Subject: [PATCH 17/22] test: fix incorrect test case for Aggregate CAS combiner --- .../data/descriptors/SegmentAnnotateMerge.xml | 12 ++--- src/test/src/test_engine.cpp | 44 +++++++++++-------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/test/data/descriptors/SegmentAnnotateMerge.xml b/src/test/data/descriptors/SegmentAnnotateMerge.xml index 98ff94a..a64653e 100644 --- a/src/test/data/descriptors/SegmentAnnotateMerge.xml +++ b/src/test/data/descriptors/SegmentAnnotateMerge.xml @@ -77,12 +77,12 @@ - - CASOutputFreq - - 2 - - + + + + + + diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index f17dba0..da40663 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -520,9 +520,7 @@ void testCasMultiplier(uima::util::ConsoleUI & rclConsole) } -/* For now, aggregate engines do not handle CAS Multipliers correctly. - This test will fail if ran. - TODO: Implement CAS Multiplier for Aggregate +/* Test the ability to handle CAS Multipliers within an aggregate engine */ void testAggregateCASMultiplier(const util::ConsoleUI &rclConsole) { @@ -568,7 +566,9 @@ void testAggregateCASMultiplier(const util::ConsoleUI &rclConsole) /* - * This will also not work + * Test CAS Multiplier that combines input CASes. + * Note that the default action for input CASes is to drop if there is an output, which means + * if they will continue on with the flow. For now this default behavior cannot be overridden yet. */ void testAggregateCASCombiner(const util::ConsoleUI &rclConsole) { @@ -601,21 +601,29 @@ void testAggregateCASCombiner(const util::ConsoleUI &rclConsole) ++numOutputs; CAS &rcas = iter.next(); ANIndex tokenIdx = rcas.getAnnotationIndex(token); - // There should be three tokens in each segment, including the delimiter (.) - failIfNotTrue(tokenIdx.getSize() == 6); - - // CAS should have a single SourceDocumentInformation whose lastSegment is true - ANIterator srcDocIt = rcas.getAnnotationIndex(srcDocInfo).iterator(); - failIfNotTrue(srcDocIt.isValid()); - AnnotationFS info = srcDocIt.get(); - failIfNotTrue(info.getBooleanValue(lastSegment)); - srcDocIt.moveToNext(); - failIfNotTrue(srcDocIt.isValid()); + size_t numToken = tokenIdx.getSize(); + + // CAS should have a single SourceDocumentInformation. + // lastSegment should be false for intermediate CASes and true for the last CAS. + ANIterator srcDocIter = rcas.getAnnotationIndex(srcDocInfo).iterator(); + failIfNotTrue(srcDocIter.isValid()); + AnnotationFS info = srcDocIter.get(); + + // If we're at the final CAS + if (numOutputs == 4) { + failIfNotTrue(numToken == 12); + failIfNotTrue(info.getBooleanValue(lastSegment)); + } else { + failIfNotTrue(numToken == 3); + failIfNotTrue(!info.getBooleanValue(lastSegment)); + } + srcDocIter.moveToNext(); + failIfNotTrue(!srcDocIter.isValid()); rcas.release(); } - failIfNotTrue(numOutputs == 2); + failIfNotTrue(numOutputs == 4); delete cas; delete pEngine; @@ -636,11 +644,9 @@ void mainTest(uima::util::ConsoleUI & rclConsole, testCallingSequence2(rclConsole, cpszConfigFilename); testCallingSequence3(rclConsole, cpszConfigFilename); } - testCasMultiplier(rclConsole); testAggregateCASMultiplier(rclConsole); - -#if 0 + testCasMultiplier(rclConsole); + testAggregateCASMultiplier(rclConsole); testAggregateCASCombiner(rclConsole); -#endif } int main(int argc, char * argv[]) /* From df7bf0c4ea8f713b07fee2da723bd5c55a6da5fe Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 22 Aug 2024 14:58:51 -0600 Subject: [PATCH 18/22] test: added test case for Step class --- src/test/src/test_engine.cpp | 39 +++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index da40663..de1cc75 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -510,7 +510,6 @@ void testCasMultiplier(uima::util::ConsoleUI & rclConsole) num++; CAS & seg = iter.next(); failIfNotTrue(seg.getDocumentText().length() > 0); - // pEngine->getAnnotatorContext().releaseCAS(seg); seg.release(); } failIfNotTrue(num==3); @@ -631,6 +630,44 @@ void testAggregateCASCombiner(const util::ConsoleUI &rclConsole) } +/** Test for correctness in the Step type, which contains a tagged union */ +void testStep(const util::ConsoleUI &rclConsole) { + rclConsole.info("Test Step class starts"); + const icu::UnicodeString dummyName("This is a test string."); + + Step emptyStep; + Step stepWithName{internal::SimpleStep(dummyName)}; + Step stepWithFinal{internal::FinalStep(false)}; + + failIfNotTrue(stepWithName.getType() == Step::StepType::SIMPLESTEP); + failIfNotTrue(stepWithName.getSimpleStep()->getEngineName() == dummyName); + + failIfNotTrue(stepWithFinal.getType() == Step::StepType::FINALSTEP); + failIfNotTrue(stepWithFinal.getFinalStep()->getForceDropCAS() == false); + + + failIfNotTrue(emptyStep.getType() == Step::StepType::UNSPECIFIED); + // Getting concrete types on empty Step will return nullptr + failIfNotTrue((emptyStep.getFinalStep() || emptyStep.getSimpleStep() || emptyStep.getFinalStep()) == false); + + // Test assignment operator on empty step + emptyStep = stepWithName; + failIfNotTrue(emptyStep.getSimpleStep()->getEngineName() == dummyName); + + // Test copy constructor + Step copiedStep(stepWithName); + failIfNotTrue(copiedStep.getType() == Step::StepType::SIMPLESTEP); + failIfNotTrue(copiedStep.getSimpleStep()->getEngineName() == dummyName); + + // Test assignment operator on Step containing SimpleStep + copiedStep = stepWithFinal; + failIfNotTrue(copiedStep.getType() == Step::StepType::FINALSTEP); + failIfNotTrue(copiedStep.getFinalStep()->getForceDropCAS() == false); + + // ParallelStep is not supported yet. +} + + void mainTest(uima::util::ConsoleUI & rclConsole, const char * cpszCCSID, const TCHAR * cpszConfigFilename, From ce9cd6cbaa6ecacee905a11ee5fcd2bac039ffc0 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 22 Aug 2024 15:06:19 -0600 Subject: [PATCH 19/22] fix: better exception message in annotator_mgr.cpp --- src/framework/annotator_mgr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 6371f2e..75cafd9 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -656,7 +656,7 @@ namespace uima { UIMA_EXC_THROW_NEW(EngineProcessingException, UIMA_ERR_USER_ANNOTATOR_COULD_NOT_PROCESS, UIMA_MSG_ID_EXCON_PROCESSING_CAS, - ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Unknown Delegate Key"), + ErrorMessage(UIMA_MSG_ID_LITERAL_STRING, "Unknown Delegate Key " + nextAEKey), ErrorInfo::unrecoverable); } } else if (nextStep.getType() == Step::StepType::PARALLELSTEP) { From 7cd9ab95eaa6b8c13d3a3b80d62d0daafdd382a6 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 22 Aug 2024 16:30:54 -0600 Subject: [PATCH 20/22] fix: added test case in test_engine --- src/test/src/test_engine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/src/test_engine.cpp b/src/test/src/test_engine.cpp index de1cc75..e93a11f 100644 --- a/src/test/src/test_engine.cpp +++ b/src/test/src/test_engine.cpp @@ -718,7 +718,7 @@ int main(int argc, char * argv[]) /* /* before we init the res mgr, we test for the correct error */ testMissingResMgr(clConsole); - + testStep(clConsole); try { /* create a UIMA resource */ (void) uima::ResourceManager::createInstance(MAIN_TITLE); From e07c09f562c6947b34a966103692540a2aff1466 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 22 Aug 2024 16:31:53 -0600 Subject: [PATCH 21/22] fix: remove CAS from activeCASes in AnnotatorManager --- src/framework/annotator_mgr.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/framework/annotator_mgr.cpp b/src/framework/annotator_mgr.cpp index 75cafd9..c970850 100644 --- a/src/framework/annotator_mgr.cpp +++ b/src/framework/annotator_mgr.cpp @@ -679,6 +679,7 @@ namespace uima { } const FinalStep *finalStep = nextStep.getFinalStep(); + activeCASes.erase(currentCas); if (currentCas == inputCas) { if (finalStep->getForceDropCAS()) { // Not allowed to drop the input CAS so something must have gone wrong From d4588ba318729d3586dfa98464e1bc5955c0d858 Mon Sep 17 00:00:00 2001 From: mac-op Date: Thu, 22 Aug 2024 21:12:15 -0600 Subject: [PATCH 22/22] framework: removed old constructor for CASPool --- src/framework/caspool.cpp | 32 -------------------------------- src/framework/uima/caspool.hpp | 7 +++---- 2 files changed, 3 insertions(+), 36 deletions(-) diff --git a/src/framework/caspool.cpp b/src/framework/caspool.cpp index fee8308..0bddab9 100644 --- a/src/framework/caspool.cpp +++ b/src/framework/caspool.cpp @@ -47,38 +47,6 @@ namespace uima { // //------------------------------------------------------------ - CASPool::CASPool(const AnalysisEngineDescription &taeSpec, - size_t numInstances) - : iv_vecAllInstances(), - iv_vecFreeInstances(), - iv_pCasDef(NULL), - iv_numInstances(numInstances), - iv_pOwner(nullptr) { - iv_pCasDef = uima::internal::CASDefinition::createCASDefinition(taeSpec); - - if (iv_pCasDef == NULL) { - UIMA_EXC_THROW_NEW(CASPoolException, - UIMA_ERR_CASPOOL_CREATE_CASDEFINITION, - UIMA_MSG_ID_EXC_CREATE_CASPOOL, - UIMA_MSG_ID_EXC_CREATE_CASPOOL, - ErrorInfo::unrecoverable); - } - - for (size_t i=0; i < numInstances; i++) { - CAS * pCas = uima::internal::CASImpl::createCASImpl(*iv_pCasDef,false); - if (pCas == NULL) { - UIMA_EXC_THROW_NEW(CASPoolException, - UIMA_ERR_CASPOOL_CREATE_CAS, - UIMA_MSG_ID_EXC_CREATE_CASPOOL, - UIMA_MSG_ID_EXC_CREATE_CASPOOL, - ErrorInfo::unrecoverable); - } - iv_vecAllInstances.push_back((CAS *)pCas->getInitialView()); - iv_vecFreeInstances.push_back((CAS *)pCas->getInitialView()); - } - } - - CASPool::CASPool(AnnotatorContext *anContext, const AnalysisEngineDescription &taeSpec, size_t numInstances) : iv_vecAllInstances(), iv_vecFreeInstances(), diff --git a/src/framework/uima/caspool.hpp b/src/framework/uima/caspool.hpp index 79f7d0b..cdb19d2 100644 --- a/src/framework/uima/caspool.hpp +++ b/src/framework/uima/caspool.hpp @@ -71,11 +71,10 @@ namespace uima { public: /** Constructor - * Creates the specified number of CAS instances based on CAS definition - * as specified in the TAE specifier. + * @param anContext The AnnotatorContext that owns this CASPool + * @param taeSpec The AnalysisEngineDescription that specifies CAS Definition for this Pool + * @param numInstances Number of CASes in this Pool */ - CASPool(const AnalysisEngineDescription & taeSpec, size_t numInstances); - CASPool(AnnotatorContext* anContext, const AnalysisEngineDescription & taeSpec, size_t numInstances); /** Destructor */ ~CASPool(void);