Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/c interface #180

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,11 @@ configure_file(
IMMEDIATE @ONLY
)

add_custom_target(
uninstall
"${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
VERBATIM
)
#add_custom_target(
# uninstall
# "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
# VERBATIM
#)

if(ENABLE_PACKAGING)
include(CreateLucene++Packages)
Expand Down
39 changes: 39 additions & 0 deletions include/lucene++/Lucene_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#ifndef _LUCENE_C_H
#define _LUCENE_C_H


#ifdef __cplusplus
extern "C" {
#endif

#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>


typedef struct index_t index_t;
typedef struct index_document_t index_document_t;
__attribute__((visibility("default"))) index_t* index_open(const char *path);


__attribute__((visibility("default"))) int index_put(index_t *index, index_document_t *idoc);

__attribute__((visibility("default"))) int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult);

__attribute__((visibility("default"))) int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult);

__attribute__((visibility("default"))) void index_close(index_t *index);

__attribute__((visibility("default"))) int index_optimize(index_t *index);


__attribute__((visibility("default"))) index_document_t* index_document_create();

__attribute__((visibility("default"))) void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int index);

__attribute__((visibility("default"))) void index_document_destroy(index_document_t *doc);
#ifdef __cplusplus
}
#endif

#endif
1 change: 1 addition & 0 deletions src/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ file(GLOB_RECURSE lucene_sources
"queryparser/*.cpp"
"store/*.cpp"
"util/*.c*"
"c/*.cc"
)

file(GLOB_RECURSE lucene_internal_headers
Expand Down
140 changes: 140 additions & 0 deletions src/core/c/Lucene_c.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include "Lucene_c.h"


#include "targetver.h"
#include <iostream>
#include "LuceneHeaders.h"
#include "FileUtils.h"
#include "MiscUtils.h"
#include "ConstantScoreQuery.h"

using namespace Lucene;
String UID = L"U$DID";
static const int MAX_NUM_OF_OUTPUT = 1000*10000;

extern "C" {

struct index_t { IndexWriterPtr rep; };
struct index_document_t { DocumentPtr rep;};



index_t* index_open(const char *path) {
IndexWriterPtr writer = newLucene<IndexWriter>(FSDirectory::open(StringUtils::toString(path)), newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
if (writer == NULL) {return NULL;}
index_t *index = new index_t;
if (index == NULL) { return NULL; }
index->rep = writer;
return index;
}

int index_put(index_t *index, index_document_t *idoc) {
index->rep->addDocument(idoc->rep);
return 1;
}

int index_multi_search(index_t *index, const char **field, const char **key, int *qSet, int nQuery, int opera, int **result, int32_t *nResult) {
if (index->rep == NULL) { return -1; }
IndexReaderPtr reader = index->rep->getReader() ;
IndexSearcherPtr searcher = newLucene<IndexSearcher>(reader);
BooleanQueryPtr bQuery = newLucene<BooleanQuery>();
BooleanClause::Occur occur;

if (opera == 0) {
occur = BooleanClause::MUST;
} else if (opera == 1) {
occur = BooleanClause::SHOULD;
} else if (opera == 2) {
occur = BooleanClause::MUST_NOT;
}
for (int i = 0; i < nQuery; i++) {
if (qSet[i] == 0) {
bQuery->add(newLucene<TermQuery>(newLucene<Term>(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur);
} else if (qSet[i] == 1) {
bQuery->add(newLucene<PrefixQuery>(newLucene<Term>(StringUtils::toString(field[i]),StringUtils::toString(key[i]))), occur);
} else if (qSet[i] == 2) {
//other query type
} else if (qSet[i] == 3) {

}
}
Collection<ScoreDocPtr> hits = searcher->search(bQuery, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs;
if (*nResult < hits.size()) {
*result = (int *)realloc(*result, hits.size() * sizeof(int));
*nResult = hits.size();
}
for (int i = 0; i < hits.size(); i++) {
(*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID));
}
return 0;
}
int index_search(index_t *index, const char *field, int32_t nField, const char *key, int32_t nKey, int type, int **result, int32_t *nResult) {
if(type == 0) {
IndexReaderPtr reader = index->rep->getReader() ;
IndexSearcherPtr searcher = newLucene<IndexSearcher>(reader);
QueryPtr query = newLucene<TermQuery>(newLucene<Term>(StringUtils::toString(*field),StringUtils::toString(*key)));
Collection<ScoreDocPtr> hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs;
if (*nResult < hits.size()) {
*result = (int *)realloc(*result, hits.size() * sizeof(int));
*nResult = hits.size();
}
for (int i = 0; i < hits.size(); i++) {
(*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID));
}

} else if (type == 1) {
//PrefixFilterPtr filter = newLucene<PrefixFilter>(newLucene<Term>(StringUtils::toString((*field)), StringUtils::toString((*key))));
//QueryPtr query = newLucene<ConstantScoreQuery>(filter);
PrefixQueryPtr query = newLucene<PrefixQuery>(newLucene<Term>(StringUtils::toString(*field), StringUtils::toString(*key)));
IndexReaderPtr reader = index->rep->getReader() ;
IndexSearcherPtr searcher = newLucene<IndexSearcher>(reader);
Collection<ScoreDocPtr> hits = searcher->search(query, FilterPtr(), MAX_NUM_OF_OUTPUT)->scoreDocs;
if (*nResult < hits.size()) {
*result = (int *)realloc(*result, hits.size() * sizeof(int));
*nResult = hits.size();
}
for (int i = 0; i < hits.size(); i++) {
(*result)[i] = StringUtils::toInt(searcher->doc(hits[i]->doc)->get(UID));
}
} else if (type == 2) {

} else if (type == 3);
return 1;
}

void index_close(index_t *index) {
if (index->rep) {
index->rep->close();
index->rep = NULL;
}
delete index;
}



int index_optimize(index_t *index) {
index->rep->optimize();
return 1;
}
index_document_t* index_document_create() {
DocumentPtr doc = newLucene<Document>();
if (doc == NULL) { return NULL; }
index_document_t *idoc = new index_document_t;
idoc->rep = doc;
return idoc;
}


void index_document_destroy(index_document_t *idoc) {
if (idoc == NULL) { return; }
idoc->rep = NULL;
delete idoc;
}
void index_document_add(index_document_t *idoc, const char *field, int nFields, const char *val, int nVals, int32_t index) {
if (index) {
idoc->rep->add(newLucene<Field>(StringUtils::toString(field), StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
} else {
idoc->rep->add(newLucene<Field>(UID, StringUtils::toString(val), Field::STORE_YES, Field::INDEX_NO));
}
}
}
92 changes: 69 additions & 23 deletions src/demo/indexfiles/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
#include "LuceneHeaders.h"
#include "FileUtils.h"
#include "MiscUtils.h"

#include "ConstantScoreQuery.h"
#include "BooleanQuery.h"
using namespace Lucene;

int32_t docNumber = 0;
Expand All @@ -42,6 +43,15 @@ DocumentPtr fileDocument(const String& docFile) {
return doc;
}

int addDoc(IndexWriterPtr& writer) {
DocumentPtr doc = newLucene<Document>();
doc->add(newLucene<Field>(L"tag1", L"cpu1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
doc->add(newLucene<Field>(L"tag2", L"cpu2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED_NO_NORMS));
doc->add(newLucene<Field>(L"uid", StringUtils::toString(10), Field::STORE_YES, Field::INDEX_NO));
writer->addDocument(doc);
return 0;

}
void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) {
HashSet<String> dirList(HashSet<String>::newInstance());
if (!FileUtils::listDirectory(sourceDir, false, dirList)) {
Expand All @@ -65,47 +75,83 @@ void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) {

/// Index all text files under a directory.
int main(int argc, char* argv[]) {
if (argc != 3) {
if (argc != 2) {
std::wcout << L"Usage: indexfiles.exe <index source dir> <lucene index dir>\n";
return 1;
}

String sourceDir(StringUtils::toUnicode(argv[1]));
String indexDir(StringUtils::toUnicode(argv[2]));
//String sourceDir(StringUtils::toUnicode(argv[1]));
String indexDir(StringUtils::toUnicode(argv[1]));

if (!FileUtils::isDirectory(sourceDir)) {
std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n";
return 1;
}
//if (!FileUtils::isDirectory(sourceDir)) {
// std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n";
// return 1;
//}

if (!FileUtils::isDirectory(indexDir)) {
if (!FileUtils::createDirectory(indexDir)) {
std::wcout << L"Unable to create directory: " << indexDir << L"\n";
return 1;
}
}
//if (!FileUtils::isDirectory(indexDir)) {
// if (!FileUtils::createDirectory(indexDir)) {
// std::wcout << L"Unable to create directory: " << indexDir << L"\n";
// return 1;
// }
//}

uint64_t beginIndex = MiscUtils::currentTimeMillis();

try {
IndexWriterPtr writer = newLucene<IndexWriter>(FSDirectory::open(indexDir), newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED);
std::wcout << L"Indexing to directory: " << indexDir << L"...\n";
for (int i = 0; i < 10000; i++) {
addDoc(writer);
}
IndexReaderPtr reader = writer->getReader();
// PrefixFilter combined with ConstantScoreQuery
PrefixFilterPtr filter = newLucene<PrefixFilter>(newLucene<Term>(L"tag1", L"cp"));
QueryPtr query = newLucene<ConstantScoreQuery>(filter);
IndexSearcherPtr searcher = newLucene<IndexSearcher>(reader);
Collection<ScoreDocPtr> hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs;
std::wcout << "size: " << hits.size() << std::endl;


BooleanQueryPtr q = newLucene<BooleanQuery>();
q->add(newLucene<TermQuery>(newLucene<Term>(L"tag1", L"cpu1")), BooleanClause::SHOULD);
q->add(newLucene<TermQuery>(newLucene<Term>(L"tag2", L"cpu2")), BooleanClause::SHOULD);
hits = searcher->search(q, FilterPtr(), 100000000)->scoreDocs;
std::wcout << "size: " << hits.size() << std::endl;

q->add(newLucene<TermQuery>(newLucene<Term>(L"tag1", L"cpu1")), BooleanClause::SHOULD);
q->add(newLucene<TermQuery>(newLucene<Term>(L"tag1", L"cpu1")), BooleanClause::MUST);
hits = searcher->search(q, 100000000)->scoreDocs;
std::wcout << "size: " << hits.size() << std::endl;

q->add(newLucene<TermQuery>(newLucene<Term>(L"tag1", L"cpu1")), BooleanClause::MUST);
q->add(newLucene<TermQuery>(newLucene<Term>(L"tag2", L"cpu1")), BooleanClause::MUST);
hits = searcher->search(q, 10000000)->scoreDocs;



indexDocs(writer, sourceDir);
BooleanQueryPtr bquery = newLucene<BooleanQuery>();
bquery->add(newLucene<PrefixQuery>(newLucene<Term>(L"tag1", L"xxx")), BooleanClause::SHOULD);
bquery->add(newLucene<TermQuery>(newLucene<Term>(L"tag2", L"cpuxxx")), BooleanClause::SHOULD);
hits = searcher->search(bquery, FilterPtr(), 10000000)->scoreDocs;
std::wcout << "size: " << hits.size() << std::endl;

//EXPECT_EQ(4, hits.size());

uint64_t endIndex = MiscUtils::currentTimeMillis();
uint64_t indexDuration = endIndex - beginIndex;
std::wcout << L"Index time: " << indexDuration << L" milliseconds\n";
std::wcout << L"Optimizing...\n";
///indexDocs(writer, sourceDir);

writer->optimize();
//uint64_t endIndex = MiscUtils::currentTimeMillis();
//uint64_t indexDuration = endIndex - beginIndex;
//std::wcout << L"Index time: " << indexDuration << L" milliseconds\n";
//std::wcout << L"Optimizing...\n";

uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex;
std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n";
//writer->optimize();

//uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex;
//std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n";

writer->close();

std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n";
//std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n";
} catch (LuceneException& e) {
std::wcout << L"Exception: " << e.getError() << L"\n";
return 1;
Expand Down