Skip to content

Commit

Permalink
chore: Bring back code
Browse files Browse the repository at this point in the history
  • Loading branch information
Romuald Rousseau committed Aug 27, 2024
1 parent eb3688b commit 66349be
Show file tree
Hide file tree
Showing 19 changed files with 823 additions and 326 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import org.apache.commons.collections4.map.LRUMap;

import com.github.romualdrousseau.any2json.base.ModelData;
import com.github.romualdrousseau.any2json.modeldata.EmptyModelData;
import com.github.romualdrousseau.shuju.preprocessing.Text;
import com.github.romualdrousseau.shuju.preprocessing.comparer.RegexComparer;
import com.github.romualdrousseau.shuju.types.Tensor;
Expand All @@ -17,7 +17,7 @@ public class Model {
public static final ThreadLocal<Model> Default = new ThreadLocal<>() {
@Override
protected Model initialValue() {
return new ModelBuilder().build();
return new Model(EmptyModelData.empty());
}
};

Expand All @@ -27,46 +27,46 @@ public Model(final ModelData modelData) {

public Model(final ModelData modelData, final Map<String, String> modelAttributes) {
this.modelData = modelData;
this.attributes = modelAttributes;
this.entities = modelData.getList("entities");
this.patterns = modelData.getMap("patterns");
this.filters = modelData.getList("filters");
this.pivotEntities = modelData.getList("pivotEntityList");
this.tags = modelData.getList("tags");
this.requiredTags = modelData.getList("requiredTags");
this.comparer = new RegexComparer(this.patterns);
this.modelAttributes = modelAttributes;
this.entityList = modelData.getList("entities");
this.patternMap = modelData.getMap("patterns");
this.filterList = modelData.getList("filters");
this.pivotEntityList = modelData.getList("pivotEntityList");
this.tagList = modelData.getList("tags");
this.requiredTagList = modelData.getList("requiredTags");
this.comparer = new RegexComparer(this.patternMap);
}

public ModelData getData() {
return modelData;
}

public Map<String, String> getAttributes() {
return this.attributes;
public Map<String, String> getModelAttributes() {
return this.modelAttributes;
}

public List<String> getEntityList() {
return this.entities;
return this.entityList;
}

public Map<String, String> getPatternMap() {
return this.patterns;
return this.patternMap;
}

public List<String> getFilters() {
return this.filters;
public List<String> getFilterList() {
return this.filterList;
}

public List<String> getPivotEntityList() {
return this.pivotEntities;
return this.pivotEntityList;
}

public List<String> getTagList() {
return this.tags;
return this.tagList;
}

public List<String> getRequiredTagList() {
return this.requiredTags;
return this.requiredTagList;
}

public String toEntityName(final String value) {
Expand All @@ -87,17 +87,17 @@ public Optional<String> toEntityValue(final String value, final String entityNam

public Tensor toEntityVector(final String value) {
return this.toEntityVectorCache.computeIfAbsent(value, v -> Tensor
.of(Text.to_categorical(v, this.entities, this.comparer).stream().mapToDouble(x -> x).toArray()));
.of(Text.to_categorical(v, this.entityList, this.comparer).stream().mapToDouble(x -> x).toArray()));
}

private final ModelData modelData;
private final Map<String, String> attributes;
private final List<String> entities;
private final Map<String, String> patterns;
private final List<String> filters;
private final List<String> pivotEntities;
private final List<String> tags;
private final List<String> requiredTags;
private final Map<String, String> modelAttributes;
private final List<String> entityList;
private final Map<String, String> patternMap;
private final List<String> filterList;
private final List<String> pivotEntityList;
private final List<String> tagList;
private final List<String> requiredTagList;
private final RegexComparer comparer;

private final LRUMap<String, Optional<String>> toEntityValueCache = new LRUMap<>();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package com.github.romualdrousseau.any2json;

import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.Optional;

public interface ModelData {

<T> Optional<T> get(String key);

<T> ModelData set(String key, T value);

List<String> getList(String key);

ModelData setList(String key, List<String> values);

Map<String, String> getMap(String key);

ModelData setMap(String key, Map<String, String> values);

void save(Path path);
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public interface Sheet {

int getLastColumnNum();

void applyTransformations();

Optional<TableGraph> getTableGraph();

Optional<Table> getTable();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ enum TagStyle {

TagClassifier setModel(final Model model);

String predict(final String name, final List<String> entities, final List<String> context);
String predict(final Table table, final Header header);

TagClassifier.TagStyle getTagStyle();

TagClassifier setTagStyle(final TagClassifier.TagStyle mode);

List<String> getLexicon();

TagClassifier setLexicon(final List<String> lexion);

String ensureTagStyle(final String text);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import com.github.romualdrousseau.any2json.parser.table.SimpleTableParser;
import com.github.romualdrousseau.any2json.readdir.GutenbergDiagonal;
import com.github.romualdrousseau.any2json.transform.op.StitchRows;
import com.github.romualdrousseau.shuju.strings.StringUtils;

public abstract class BaseDocument implements Document {

Expand Down Expand Up @@ -87,6 +88,9 @@ public Document setRawHints(final EnumSet<Hint> hints) {

@Override
public String getRecipe() {
if (StringUtils.isBlank(this.recipe)) {
this.recipe = String.join("\n", this.model.getData().getList("recipe"));
}
return this.recipe;
}

Expand Down Expand Up @@ -143,19 +147,25 @@ public void updateParsersAndClassifiers() {
final var capa = this.getIntelliCapabilities();

if (capa.contains(Document.Hint.INTELLI_EXTRACT) && this.hints.contains(Document.Hint.INTELLI_EXTRACT)) {
this.sheetParser = new SheetBitmapParser();
if (this.sheetParser instanceof SimpleSheetParser) {
this.sheetParser = new SheetBitmapParser();
}
}

if (capa.contains(Document.Hint.INTELLI_LAYOUT) && this.hints.contains(Document.Hint.INTELLI_LAYOUT)) {
this.tableParser = DynamicPackages.GetElementParserFactory()
.map(x -> x.newInstance(this.model, this.tableParser.getParserOptions()))
.orElseGet(() -> new SimpleTableParser(this.model, null));
if (this.tableParser instanceof SimpleTableParser) {
this.tableParser = DynamicPackages.GetElementParserFactory()
.map(x -> x.newInstance(this.model, this.tableParser.getParserOptions()))
.orElseGet(() -> new SimpleTableParser(this.model, null));
}
}

if (capa.contains(Document.Hint.INTELLI_TAG) && this.hints.contains(Document.Hint.INTELLI_TAG)) {
this.tagClassifier = DynamicPackages.GetTagClassifierFactory()
.map(x -> x.newInstance(this.model, this.tagClassifier.getTagStyle()))
.orElseGet(() -> new SimpleTagClassifier(this.model, this.tagClassifier.getTagStyle()));
if (this.tagClassifier instanceof SimpleTagClassifier) {
this.tagClassifier = DynamicPackages.GetTagClassifierFactory()
.map(x -> x.newInstance(this.model, this.tagClassifier.getTagStyle()))
.orElseGet(() -> new SimpleTagClassifier(this.model, this.tagClassifier.getTagStyle()));
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ public void addSheetListener(final SheetListener listener) {
this.listeners.add(listener);
}

@Override
public void applyTransformations() {
if (this.transfoApplied) {
return;
}
if (this.getLastRowNum() <= 0 || this.getLastColumnNum() <= 0) {
return;
}
TransformableSheet.of(this).applyAll();
this.transfoApplied = true;
}

@Override
public Optional<TableGraph> getTableGraph() {

Expand All @@ -81,9 +93,9 @@ public Optional<TableGraph> getTableGraph() {
return Optional.empty();
}

// Apply recipes
// Apply transformations

TransformableSheet.of(this).applyAll();
this.applyTransformations();
if (!this.notifyStepCompleted(new SheetPreparedEvent(this))) {
return Optional.empty();
}
Expand Down Expand Up @@ -418,6 +430,7 @@ private int computeLastColumnNum() {
private final List<Integer> columnMask;
private final int storeLastColumnNum;

private boolean transfoApplied = false;
private boolean unmergedAll = false;
private float capillarityThreshold = Settings.DEFAULT_CAPILLARITY_THRESHOLD;
private boolean pivotEnabled;
Expand Down
Loading

0 comments on commit 66349be

Please sign in to comment.