Skip to content

Commit

Permalink
Merge pull request #136 from jamebal/develop
Browse files Browse the repository at this point in the history
perf: 优化部分文本内容不会被索引的问题
  • Loading branch information
jamebal authored Jul 16, 2024
2 parents 19046e5 + 1e9048c commit bba2edb
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions src/main/java/com/jmal/clouddisk/lucene/LuceneService.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.jmal.clouddisk.lucene;

import cn.hutool.core.io.CharsetDetector;
import cn.hutool.core.io.FileTypeUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.thread.ThreadUtil;
Expand Down Expand Up @@ -35,6 +36,7 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.time.LocalDateTime;
Expand Down Expand Up @@ -349,11 +351,12 @@ private String readFileContent(File file) {
if ("doc".equals(type) || "docx".equals(type)) {
return FileContentUtil.readWordContent(file);
}
String charset = UniversalDetector.detectCharset(file);
if (StrUtil.isNotBlank(charset)) {
if (fileProperties.getSimText().contains(type)) {
return FileUtil.readString(file, Charset.forName(charset));
if (fileProperties.getSimText().contains(type)) {
String charset = UniversalDetector.detectCharset(file);
if (StrUtil.isBlank(charset)) {
charset = String.valueOf(CharsetDetector.detect(file, StandardCharsets.UTF_8));
}
return FileUtil.readString(file, Charset.forName(charset));
}
} catch (Exception e) {
log.error("读取文件内容失败, file: {}, {}", file.getAbsolutePath(), e.getMessage(), e);
Expand Down Expand Up @@ -417,7 +420,9 @@ public void updateIndexDocument(IndexWriter indexWriter, FileIndex fileIndex, St
newDocument.add(new StringField("type", fileIndex.getType(), Field.Store.NO));
}
if (StrUtil.isNotBlank(fileName)) {
newDocument.add(new StringField("name", fileName.toLowerCase(), Field.Store.NO));
fileName = fileName.toLowerCase();
newDocument.add(new StringField("name", fileName, Field.Store.NO));
newDocument.add(new TextField("content", fileName, Field.Store.NO));
}
if (isFolder != null) {
newDocument.add(new IntPoint("isFolder", isFolder ? 1 : 0));
Expand All @@ -429,7 +434,9 @@ public void updateIndexDocument(IndexWriter indexWriter, FileIndex fileIndex, St
newDocument.add(new StringField("path", path, Field.Store.NO));
}
if (StrUtil.isNotBlank(tagName)) {
newDocument.add(new StringField("tag", tagName.toLowerCase(), Field.Store.NO));
tagName = tagName.toLowerCase();
newDocument.add(new StringField("tag", tagName, Field.Store.NO));
newDocument.add(new TextField("content", tagName, Field.Store.NO));
}
if (StrUtil.isNotBlank(content)) {
newDocument.add(new TextField("content", content, Field.Store.NO));
Expand Down Expand Up @@ -495,10 +502,10 @@ public ResponseResult<List<FileIntroVO>> searchFile(SearchDTO searchDTO) {
result.setData(fileIntroVOList);
result.setCount(count);
return result;
} catch (IOException | ParseException e) {
} catch (IOException | ParseException | java.lang.IllegalArgumentException e) {
log.error("搜索失败", e);
return result.setData(Collections.emptyList()).setCount(0);
}
return result;
}

/**
Expand Down Expand Up @@ -537,8 +544,11 @@ private Query getQuery(SearchDTO searchDTO) throws ParseException {
String[] fields = {"name", "tag", "content"};
Map<String, Float> boosts = Map.of("name", 3.0f, "tag", 2.0f, "content", 1.0f);

// 将关键字转为小写并去掉空格和特殊字符
String keyword = searchDTO.getKeyword().toLowerCase().trim().replaceAll("[\\s\\p{Punct}]+", " ");
// 将关键字转为小写并去掉空格
String keyword = searchDTO.getKeyword().toLowerCase().trim();

// 将关键字中的特殊字符转义
keyword = QueryParser.escape(keyword);

// 创建正则表达式查询
BooleanQuery.Builder regexpQueryBuilder = new BooleanQuery.Builder();
Expand Down

0 comments on commit bba2edb

Please sign in to comment.