Skip to content

Commit

Permalink
split indexopts
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewyates committed Oct 9, 2020
1 parent daf2004 commit 0c03607
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions capreolus/searcher/anserini.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ def _anserini_query_from_file(self, topicsfn, anserini_param_str, output_base_pa
os.makedirs(output_base_path, exist_ok=True)
output_path = os.path.join(output_base_path, "searcher")

# add stemmer and stop options to match underlying index
indexopts = "-stemmer "
indexopts += "none" if self.index.config["stemmer"] is None else self.index.config["stemmer"]
if self.index.config["indexstops"]:
indexopts += " -keepstopwords"

index_path = self.index.get_index_path()
anserini_fat_jar = Anserini.get_fat_jar()
cmd = [
Expand All @@ -61,7 +55,6 @@ def _anserini_query_from_file(self, topicsfn, anserini_param_str, output_base_pa
"Trec",
"-index",
index_path,
indexopts,
"-topics",
topicsfn,
"-output",
Expand All @@ -71,8 +64,12 @@ def _anserini_query_from_file(self, topicsfn, anserini_param_str, output_base_pa
"-inmem",
"-threads",
str(MAX_THREADS),
anserini_param_str,
]
"-stemmer",
"none" if self.index.config["stemmer"] is None else self.index.config["stemmer"],
] + anserini_param_str.split()

if self.index.config["indexstops"]:
cmd += ["-keepStopwords"]

logger.info("Anserini writing runs to %s", output_path)
logger.debug(cmd)
Expand Down

0 comments on commit 0c03607

Please sign in to comment.