Skip to content

Commit

Permalink
fix #2821 Exclude X-FESS metadata from indexing and add transformatio…
Browse files Browse the repository at this point in the history
…n process for metadata inclusion.
  • Loading branch information
marevol committed Jun 20, 2024
1 parent 4afbb34 commit ec33a25
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,15 +136,15 @@ protected Map<String, Object> generateData(final ResponseData responseData) {
dataMap.put(mapping.getValue1(), Double.parseDouble(values[0]));
} else if (Constants.MAPPING_TYPE_DATE.equalsIgnoreCase(mapping.getValue2())
|| Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
final String dateFormate;
final String dateFormat;
if (StringUtil.isNotBlank(mapping.getValue3())) {
dateFormate = mapping.getValue3();
dateFormat = mapping.getValue3();
} else if (Constants.MAPPING_TYPE_PDF_DATE.equalsIgnoreCase(mapping.getValue2())) {
dateFormate = mapping.getValue2();
dateFormat = Constants.MAPPING_TYPE_PDF_DATE;
} else {
dateFormate = Constants.DATE_OPTIONAL_TIME;
dateFormat = Constants.DATE_OPTIONAL_TIME;
}
final Date dt = FessFunctions.parseDate(values[0], dateFormate);
final Date dt = FessFunctions.parseDate(values[0], dateFormat);
if (dt != null) {
dataMap.put(mapping.getValue1(), FessFunctions.formatDate(dt));
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction
/** The key of the configuration. e.g. cpu */
String CRAWLER_HOTTHREAD_TYPE = "crawler.hotthread.type";

/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* */
/** The key of the configuration. e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* */
String CRAWLER_METADATA_CONTENT_EXCLUDES = "crawler.metadata.content.excludes";

/** The key of the configuration. e.g. title=title:string<br>
Expand Down Expand Up @@ -2926,7 +2926,7 @@ public interface FessConfig extends FessEnv, org.codelibs.fess.mylasta.direction

/**
* Get the value for the key 'crawler.metadata.content.excludes'. <br>
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.* <br>
* The value is, e.g. resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.* <br>
* @return The value of found property. (NotNull: if not found, exception but basically no way)
*/
String getCrawlerMetadataContentExcludes();
Expand Down Expand Up @@ -10899,7 +10899,7 @@ protected java.util.Map<String, String> prepareGeneratedDefaultMap() {
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TIMEOUT, "30s");
defaultMap.put(FessConfig.CRAWLER_HOTTHREAD_TYPE, "cpu");
defaultMap.put(FessConfig.CRAWLER_METADATA_CONTENT_EXCLUDES,
"resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*");
"resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*");
defaultMap.put(FessConfig.CRAWLER_METADATA_NAME_MAPPING, "title=title:string\nTitle=title:string\ndc:title=title:string\n");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_CONTENT_XPATH, "//BODY");
defaultMap.put(FessConfig.CRAWLER_DOCUMENT_HTML_LANG_XPATH, "//HTML/@lang");
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/org/codelibs/fess/mylasta/direction/FessProp.java
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,18 @@ default Tuple3<String, String, String> getCrawlerMetadataNameMapping(final Strin
return params.get(name);
}

default void addCrawlerMetadataNameMapping(final String name, final String fieldName, final String mappingType,
final String dateFormat) {
if (getCrawlerMetadataNameMapping(name) != null) {
return;
}

@SuppressWarnings("unchecked")
final Map<String, Tuple3<String, String, String>> params =
(Map<String, Tuple3<String, String, String>>) propMap.get(CRAWLER_METADATA_NAME_MAPPING);
params.put(name, new Tuple3<>(fieldName, mappingType, dateFormat));
}

String getSuggestPopularWordFields();

default String[] getSuggestPopularWordFieldsAsArray() {
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/fess_config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ crawler.hotthread.snapshots=10
crawler.hotthread.threads=3
crawler.hotthread.timeout=30s
crawler.hotthread.type=cpu
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*
crawler.metadata.content.excludes=resourceName,X-Parsed-By,Content-Encoding.*,Content-Type.*,X-TIKA.*,X-FESS.*
crawler.metadata.name.mapping=\
title=title:string\n\
Title=title:string\n\
Expand Down

0 comments on commit ec33a25

Please sign in to comment.