-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Experimental .blf.yaml version 2. Improvements to processing steps.
- multipleValues is no longer necessary, works automatically now.a - allowDuplicateValues should be replaced by new 'unique' processing step - mapValues should be moved into a 'map' processing step with parameter 'table' - inline tags attributes can be defined in a single 'attributes' block now, which replaces include/exclude/extraAttributes - v2 removes baseFormat (inheritance), above obsolete attributes. - v2 'append' processing step has prefix parameter (default empty) instead of using separator, so it doesn't always append a space first. - removed exception for lemma and word annotations; all annotations are insensitive only now unless specified otherwise. - 'split' and 'replace' now support regex flags i and u (unicode char. classes) - multipleValuesSeparator (tabular format) deprecated; use 'split' processing step instead. Squashed commit of the following: commit dda9aca Author: Jan Niestadt <[email protected]> Date: Fri Mar 7 12:58:08 2025 +0100 Remove multipleValues. commit bca9881 Author: Jan Niestadt <[email protected]> Date: Fri Mar 7 11:35:04 2025 +0100 Single attributes key replaces include/exclude/extraAttributes. For v1, the old keys still work. In v2, they have been removed. commit c02cdae Author: Jan Niestadt <[email protected]> Date: Fri Mar 7 10:55:33 2025 +0100 Various v2 changes. commit 522d3bb Author: Jan Niestadt <[email protected]> Date: Fri Mar 7 09:56:52 2025 +0100 Remove format inheritance (baseFormat), word/lemma default sensitivity in v2. commit b20862b Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 15:43:05 2025 +0100 Document differences between v1 and v2. commit e08a4da Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 15:38:04 2025 +0100 In v2, append works more like StringBuilder. commit 27c203f Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 15:18:47 2025 +0100 Deprecate allowDuplicateValues. Experimental v2 .blf.yaml. allowDuplicateValues still works, and is automatically added as the last processing step. Version 2 of .blf.yaml defaults to Saxon processor. commit fb69aa9 Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 14:18:34 2025 +0100 Warning. replace keep 'both'. Docs. commit 3d0e704 Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 13:41:24 2025 +0100 Fix test failure. commit 0722290 Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 13:26:44 2025 +0100 map, unique operations. deprecated multipleValues. Also deprecated multipleValuesSeparator (tabular formats). commit 5d79000 Author: Jan Niestadt <[email protected]> Date: Thu Mar 6 11:57:02 2025 +0100 WIP mapValues. commit 2fa67b1 Author: Jan Niestadt <[email protected]> Date: Wed Mar 5 18:20:27 2025 +0100 Failing unit tests. commit fd745db Author: Jan Niestadt <[email protected]> Date: Wed Mar 5 13:53:40 2025 +0100 Implement ProcessingStep classes. commit 10cb608 Author: Jan Niestadt <[email protected]> Date: Wed Mar 5 12:41:07 2025 +0100 Regex flags for replace/split.
- Loading branch information
1 parent
660dd96
commit 0acf9b7
Showing
39 changed files
with
1,592 additions
and
618 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
engine/src/main/java/nl/inl/blacklab/indexers/config/ConfigAttribute.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
package nl.inl.blacklab.indexers.config; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
import nl.inl.blacklab.indexers.config.process.ProcessingStep; | ||
|
||
/** | ||
* Configuration for attributes to index using XPath | ||
*/ | ||
public class ConfigAttribute { | ||
/** | ||
* Attribute name | ||
*/ | ||
private String name; | ||
|
||
/** | ||
* Exclude this attribute? | ||
*/ | ||
private boolean exclude = false; | ||
|
||
/** | ||
* XPath to get attribute's value, or null if this attribute is present on the tag. | ||
*/ | ||
private String valuePath; | ||
|
||
/** | ||
* How to process annotation values (if at all) | ||
*/ | ||
private final List<ConfigProcessStep> process = new ArrayList<>(); | ||
|
||
public ConfigAttribute() { | ||
|
||
} | ||
|
||
public String getName() { | ||
return name; | ||
} | ||
|
||
public void setName(String name) { | ||
this.name = name; | ||
} | ||
|
||
public String getValuePath() { | ||
return valuePath; | ||
} | ||
|
||
public void setValuePath(String valuePath) { | ||
this.valuePath = valuePath; | ||
} | ||
|
||
public void setExclude(boolean exclude) { | ||
this.exclude = exclude; | ||
} | ||
|
||
public boolean isExclude() { | ||
return exclude; | ||
} | ||
|
||
ProcessingStep processSteps; | ||
|
||
public synchronized ProcessingStep getProcess() { | ||
if (processSteps == null) { | ||
processSteps = ProcessingStep.fromConfig(process); | ||
} | ||
return processSteps; | ||
} | ||
|
||
public void setProcess(List<ConfigProcessStep> process) { | ||
this.process.clear(); | ||
this.process.addAll(process); | ||
} | ||
|
||
public void validate() { | ||
ConfigInputFormat.req(name, "extra attribute", "name"); | ||
ConfigInputFormat.req(valuePath, "extra attribute", "valuePath"); | ||
for (ConfigProcessStep step: process) { | ||
step.validate(); | ||
} | ||
} | ||
|
||
/** | ||
* Is this a nameless rule that simply says "exclude any attribute that isn't explicitly included?" | ||
*/ | ||
public boolean isDefaultExclude() { | ||
return exclude && name == null; | ||
} | ||
} |
Oops, something went wrong.