-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
159 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class IREvaluator(object): | ||
"""description of class""" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#!/usr/bin/python | ||
import ir_system | ||
import re | ||
import sys | ||
|
||
################################################################################# | ||
## @brief preprocess_input | ||
# @details This method reads user input and transform it into a list | ||
# @param user_input The input given by the user | ||
################################################################################# | ||
def preprocess_userinput(user_input): | ||
if "/" or "\\" in user_input: # the user has provided a file path with a set of texts | ||
try: | ||
list_texts = re.split(".I \d*\n.W\n",open(user_input).read())[1:] # Split text file with the delimiter, erase first delimiter | ||
return list_texts | ||
except IOError: | ||
print user_input + " - No such file or directory" | ||
sys.exit(0) | ||
return user_input # the user has provided a query or a text | ||
|
||
################################################################################# | ||
## @brief create_ir_system | ||
# @details This method creates an information retrieval system with the model | ||
# chosen by the user | ||
# @param irmodel_choice The id of the information retrieval model chosen by the user | ||
################################################################################# | ||
def create_ir_system(irmodel_choice,corpus,query): | ||
if irmodel_choice == 0: | ||
print("dfd") | ||
return ir_system.IRBoolean(corpus,query) | ||
elif irmodel_choice == 1: | ||
return ir_system.IR_tf(corpus,query) | ||
elif irmodel_choice == 2: | ||
return ir_system.IR_tf_idf(corpus,query) | ||
|
||
|
||
####################################################################################################################### | ||
## @brief The main function that enables the user to launch queries | ||
####################################################################################################################### | ||
if __name__ == '__main__': | ||
|
||
print("--------------------------------------------------------\n") | ||
print("------------ Project: Information Retrieval System\n") | ||
print("------------ Course: Data Science Master - Technical University of Madrid\n") | ||
print("------------ Subject: Information Extraction, Retrieval and Intregation\n") | ||
print("------------ Author: Yolanda de la Hoz Simon\n") | ||
print("--------------------------------------------------------\n") | ||
|
||
corpus_input = raw_input("Write a text or enter the corpus path:\n") | ||
corpus_text=preprocess_userinput(corpus_input) | ||
query_input = raw_input("Write a query or enter a document path with a set of queries:\n") | ||
query_text=preprocess_userinput(query_input) | ||
|
||
print("\n The available models are: \n 0:Boolean\n 1:TF\n 2:TF-IDF\n \n") | ||
irmodel_choice = raw_input("Please, choose an information retrieval model by entering the id of the model:\n") | ||
|
||
ir = create_ir_system(int(irmodel_choice),corpus_text,query_text) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class RocchioAlgorithm(object): | ||
"""description of class""" | ||
|
||
|