Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

移除anything&全文搜索优化 #2446

Merged
merged 6 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions assets/dbus/org.deepin.Filemanager.TextIndex.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
<arg type="b" direction="out"/>
<arg name="path" type="s" direction="in"/>
</method>
<method name="RemoveIndexTask">
<arg type="b" direction="out"/>
<arg name="paths" type="as" direction="in"/>
</method>
<method name="StopCurrentTask">
<arg type="b" direction="out"/>
</method>
Expand All @@ -28,5 +32,8 @@
<method name="IndexDatabaseExists">
<arg type="b" direction="out"/>
</method>
<method name="GetLastUpdateTime">
<arg type="s" direction="out"/>
</method>
</interface>
</node>
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,15 @@ TextIndexController::TextIndexController(QObject *parent)
fmInfo() << "[TextIndex] Checking index database existence";
auto pendingExists = interface->IndexDatabaseExists();
pendingExists.waitForFinished();
auto lastUpdateTime = interface->GetLastUpdateTime();
lastUpdateTime.waitForFinished();

if (pendingExists.isError()) {
if (pendingExists.isError() || lastUpdateTime.isError()) {
fmWarning() << "[TextIndex] Failed to check index existence:" << pendingExists.error().message();
return;
}

bool needCreate = !pendingExists.value();
bool needCreate = (!pendingExists.value() || lastUpdateTime.value().isEmpty());
fmInfo() << "[TextIndex] Index check result - Need create:" << needCreate;
startIndexTask(needCreate);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,9 @@ void SearchEditWidget::doComplete()
completerView->setFixedHeight(h < kCompleterMaxHeight ? h : kCompleterMaxHeight);
}
completerView->show();
completerView->activateWindow();

// maybe lost focus?
// completerView->activateWindow();

return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
#include <exception>
#include <docparser.h>

static constexpr char kFilterFolders[] = "^/(boot|dev|proc|sys|run|lib|usr).*$";
static constexpr char kSupportFiles[] = "(rtf)|(odt)|(ods)|(odp)|(odg)|(docx)|(xlsx)|(pptx)|(ppsx)|(md)|"
"(xls)|(xlsb)|(doc)|(dot)|(wps)|(ppt)|(pps)|(txt)|(pdf)|(dps)|"
"(sh)|(html)|(htm)|(xml)|(xhtml)|(dhtml)|(shtm)|(shtml)|"
"(json)|(css)|(yaml)|(ini)|(bat)|(js)|(sql)|(uof)|(ofd)";
static int kMaxResultNum = 100000; // 最大搜索结果数
static int kEmitInterval = 50; // 推送时间间隔

Expand Down Expand Up @@ -68,86 +63,6 @@ IndexReaderPtr FullTextSearcherPrivate::newIndexReader()
return IndexReader::open(FSDirectory::open(indexStorePath().toStdWString()), true);
}

void FullTextSearcherPrivate::indexDocs(const IndexWriterPtr &writer, const QString &file, IndexType type)
{
Q_ASSERT(writer);

try {
switch (type) {
case kAddIndex: {
fmDebug() << "Adding [" << file << "]";
// 添加
writer->addDocument(fileDocument(file));
break;
}
case kUpdateIndex: {
fmDebug() << "Update file: [" << file << "]";
// 定义一个更新条件
TermPtr term = newLucene<Term>(L"path", file.toStdWString());
// 更新
writer->updateDocument(term, fileDocument(file));
break;
}
case kDeleteIndex: {
fmDebug() << "Delete file: [" << file << "]";
// 定义一个删除条件
TermPtr term = newLucene<Term>(L"path", file.toStdWString());
// 删除
writer->deleteDocuments(term);
break;
}
}
} catch (const LuceneException &e) {
QMetaEnum enumType = QMetaEnum::fromType<FullTextSearcherPrivate::IndexType>();
fmWarning() << QString::fromStdWString(e.getError()) << " type: " << enumType.valueToKey(type);
} catch (const std::exception &e) {
QMetaEnum enumType = QMetaEnum::fromType<FullTextSearcherPrivate::IndexType>();
fmWarning() << QString(e.what()) << " type: " << enumType.valueToKey(type);
} catch (...) {
fmWarning() << "Index document failed! " << file;
}
}

bool FullTextSearcherPrivate::checkUpdate(const IndexReaderPtr &reader, const QString &file, IndexType &type)
{
Q_ASSERT(reader);

try {
SearcherPtr searcher = newLucene<IndexSearcher>(reader);
TermQueryPtr query = newLucene<TermQuery>(newLucene<Term>(L"path", file.toStdWString()));

// 文件路径为唯一值,所以搜索一个结果就行了
TopDocsPtr topDocs = searcher->search(query, 1);
int32_t numTotalHits = topDocs->totalHits;
if (numTotalHits == 0) {
type = kAddIndex;
return true;
} else {
DocumentPtr doc = searcher->doc(topDocs->scoreDocs[0]->doc);
auto info = InfoFactory::create<FileInfo>(QUrl::fromLocalFile(file),
Global::CreateFileInfoType::kCreateFileInfoSync);
if (!info)
return false;

const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() };
const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) };
const String &storeTime { doc->get(L"modified") };
if (modifyEpoch.toStdWString() != storeTime) {
type = kUpdateIndex;
return true;
}
}
} catch (const LuceneException &e) {
fmWarning() << QString::fromStdWString(e.getError()) << " file: " << file;
} catch (const std::exception &e) {
fmWarning() << QString(e.what()) << " file: " << file;
} catch (...) {
fmWarning() << "The file checked failed!" << file;
}

return false;
}

void FullTextSearcherPrivate::tryNotify()
{
int cur = notifyTimer.elapsed();
Expand All @@ -158,26 +73,6 @@ void FullTextSearcherPrivate::tryNotify()
}
}

DocumentPtr FullTextSearcherPrivate::fileDocument(const QString &file)
{
DocumentPtr doc = newLucene<Document>();
// file path
doc->add(newLucene<Field>(L"path", file.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));

// file last modified time
auto info = InfoFactory::create<FileInfo>(QUrl::fromLocalFile(file),
Global::CreateFileInfoType::kCreateFileInfoSync);
const QDateTime &modifyTime { info->timeOf(TimeInfoType::kLastModified).toDateTime() };
const QString &modifyEpoch { QString::number(modifyTime.toSecsSinceEpoch()) };
doc->add(newLucene<Field>(L"modified", modifyEpoch.toStdWString(), Field::STORE_YES, Field::INDEX_NOT_ANALYZED));

// file contents
QString contents = DocParser::convertFile(file.toStdString()).c_str();
doc->add(newLucene<Field>(L"contents", contents.toStdWString(), Field::STORE_YES, Field::INDEX_ANALYZED));

return doc;
}

bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keyword)
{
fmInfo() << "search path: " << path << " keyword: " << keyword;
Expand All @@ -189,12 +84,11 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
hasTransform = true;

try {
IndexWriterPtr writer = newIndexWriter();
IndexReaderPtr reader = newIndexReader();
SearcherPtr searcher = newLucene<IndexSearcher>(reader);
AnalyzerPtr analyzer = newLucene<ChineseAnalyzer>();
QueryParserPtr parser = newLucene<QueryParser>(LuceneVersion::LUCENE_CURRENT, L"contents", analyzer);
//设定第一个* 可以匹配
// 设定第一个* 可以匹配
parser->setAllowLeadingWildcard(true);
QueryPtr query = parser->parse(keyword.toStdWString());

Expand All @@ -207,8 +101,9 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
Collection<ScoreDocPtr> scoreDocs = topDocs->scoreDocs;

QHash<QString, QSet<QString>> hiddenFileHash;
QSet<QString> invalidIndexPaths; // 存储无效的索引路径
for (auto scoreDoc : scoreDocs) {
//中断
// 中断
if (status.loadAcquire() != AbstractSearcher::kRuning)
return false;

Expand All @@ -219,10 +114,9 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
const QUrl &url = QUrl::fromLocalFile(StringUtils::toUTF8(resultPath).c_str());
auto info = InfoFactory::create<FileInfo>(url,
Global::CreateFileInfoType::kCreateFileInfoSync);
// delete invalid index
// 收集无效的索引路径
if (!info || !info->exists()) {
// TODO(zhangs):
// indexDocs(writer, url.path(), kDeleteIndex);
invalidIndexPaths.insert(url.path());
continue;
}

Expand All @@ -239,14 +133,22 @@ bool FullTextSearcherPrivate::doSearch(const QString &path, const QString &keywo
allResults.append(QUrl::fromLocalFile(StringUtils::toUTF8(resultPath).c_str()));
}

//推送
// 推送
tryNotify();
}
}
}

reader->close();
writer->close();

// 如果有无效的索引路径,一次性启动移除任务
if (!invalidIndexPaths.isEmpty()) {
auto client = TextIndexClient::instance();
client->startTask(TextIndexClient::TaskType::Remove,
QStringList(invalidIndexPaths.begin(), invalidIndexPaths.end()));
invalidIndexPaths.clear();
}

} catch (const LuceneException &e) {
fmWarning() << QString::fromStdWString(e.getError());
} catch (const std::exception &e) {
Expand Down Expand Up @@ -353,6 +255,15 @@ bool FullTextSearcher::search()
auto serviceStatus = client->checkService();
if (serviceStatus != TextIndexClient::ServiceStatus::Available) {
// 如果服务不可用,直接执行搜索
fmWarning() << "Service is not available, search directly";
d->doSearchAndEmit(path, key);
return true;
}

// 检查到有服务正在运行,也直接执行搜索
auto hasRunningTask = client->hasRunningTask();
if (hasRunningTask.has_value() && hasRunningTask.value()) {
fmWarning() << "Service is running, search directly";
d->doSearchAndEmit(path, key);
return true;
}
Expand All @@ -361,22 +272,37 @@ bool FullTextSearcher::search()
auto indexExistsResult = client->indexExists();
if (!indexExistsResult.has_value()) {
// 如果无法确定索引状态,直接执行搜索
fmWarning() << "Failed to check index status, search directly";
d->doSearchAndEmit(path, key);
return true;
}

// 判断是否是根目录或家目录
const QString homePath = QDir::homePath();
const bool isRootOrHome = (path == "/" || path == homePath);

// 根据索引状态决定是创建还是更新
if (!indexExistsResult.value()) {
// 如果索引不存在,需要等待创建完成
QString bindPath = FileUtils::bindPathTransform(path, false);
client->startTask(TextIndexClient::TaskType::Create, bindPath);

// 等待任务完成
if (!waitForTask()) {
d->status.storeRelease(kCompleted);
return false;
}
} else {
// 索引存在的情况
// 启动更新任务
client->startTask(TextIndexClient::TaskType::Update, path);
}

// 等待任务完成
if (!waitForTask()) {
d->status.storeRelease(kCompleted);
return false;
// 如果不是根目录或家目录,等待更新完成
// 如果是根目录或家目录,直接继续搜索
if (!isRootOrHome && !waitForTask()) {
d->status.storeRelease(kCompleted);
return false;
}
}

// 执行搜索并发送结果
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,7 @@ class FullTextSearcherPrivate : public QObject
return path;
}

Lucene::DocumentPtr fileDocument(const QString &file);
QString dealKeyword(const QString &keyword);
void indexDocs(const Lucene::IndexWriterPtr &writer, const QString &file, IndexType type);
bool checkUpdate(const Lucene::IndexReaderPtr &reader, const QString &file, IndexType &type);
void tryNotify();

bool isUpdated = false;
Expand All @@ -82,6 +79,8 @@ class FullTextSearcherPrivate : public QObject
FullTextSearcher *q = nullptr;

void doSearchAndEmit(const QString &path, const QString &key);

QSet<QString> invalidIndexPaths; // 存储无效的索引路径
};

DPSEARCH_END_NAMESPACE
Expand Down
Loading
Loading