From 363033c6796f6a8e8c3b4a5b37bfc12d4b760757 Mon Sep 17 00:00:00 2001 From: "380133194@qq.com" Date: Wed, 19 Dec 2018 16:26:19 +0800 Subject: [PATCH] cmpatible to centos --- clone-detector/backup-gtpm.sh | 3 +- clone-detector/controller.py | 3 +- clone-detector/execute.sh | 3 +- clone-detector/move-index.sh | 3 +- clone-detector/preparequery.sh | 3 +- clone-detector/replacenodeprefix.sh | 3 +- clone-detector/restore-gtpm.sh | 3 +- clone-detector/results.pairs | 44 ++++++++++++++++++++++++++ clone-detector/runnodes.sh | 3 +- clone-detector/search_metadata.txt | 1 + clone-detector/search_status.sh | 3 +- clone-detector/sourcerer-cc.properties | 2 +- clone-detector/splitquery.sh | 3 +- clone-detector/unevensplit.py | 20 ++++++------ 14 files changed, 76 insertions(+), 21 deletions(-) create mode 100644 clone-detector/results.pairs create mode 100644 clone-detector/search_metadata.txt diff --git a/clone-detector/backup-gtpm.sh b/clone-detector/backup-gtpm.sh index 000ada0c6..c15c356e9 100755 --- a/clone-detector/backup-gtpm.sh +++ b/clone-detector/backup-gtpm.sh @@ -1,7 +1,8 @@ #!/bin/bash # # -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo "backing up gtpm indexes..." rm -rf $rootPATH/backup_gtpm diff --git a/clone-detector/controller.py b/clone-detector/controller.py index 7a424dfd8..a411493e1 100644 --- a/clone-detector/controller.py +++ b/clone-detector/controller.py @@ -52,7 +52,8 @@ def execute(self): command = self.full_script_path('execute.sh', "1") command_params = command.split() returncode = self.run_command( - command_params, self.full_file_path("Log_execute_1.out"), self.full_file_path("Log_execute_1.err")) + command_params, self.full_file_path("Log_execute_1.out"), + self.full_file_path("Log_execute_1.err")) self.current_state += 1 if returncode == ScriptController.EXIT_SUCCESS: self.flush_state() diff --git a/clone-detector/execute.sh b/clone-detector/execute.sh index d37b0c63e..fd704aed8 100755 --- a/clone-detector/execute.sh +++ b/clone-detector/execute.sh @@ -1,5 +1,6 @@ #!/bin/bash -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo $rootPATH rm -rf $rootPATH/NODE* diff --git a/clone-detector/move-index.sh b/clone-detector/move-index.sh index 5428b2b4b..8b30f604c 100755 --- a/clone-detector/move-index.sh +++ b/clone-detector/move-index.sh @@ -2,7 +2,8 @@ # # Run this script after indexing with just 1 node # -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo "Installing indexes..." diff --git a/clone-detector/preparequery.sh b/clone-detector/preparequery.sh index 57fdc1f0a..d08074030 100755 --- a/clone-detector/preparequery.sh +++ b/clone-detector/preparequery.sh @@ -1,6 +1,7 @@ #!/bin/bash num_nodes="${1:-0}" -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo "rootpath is : $rootPATH" for i in $(seq 1 1 $num_nodes) diff --git a/clone-detector/replacenodeprefix.sh b/clone-detector/replacenodeprefix.sh index e4959e745..5a631f3dd 100755 --- a/clone-detector/replacenodeprefix.sh +++ b/clone-detector/replacenodeprefix.sh @@ -1,5 +1,6 @@ #!/bin/bash -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` num_nodes="${1:-0}" src_text="NODE_PREFIX=NODE" diff --git a/clone-detector/restore-gtpm.sh b/clone-detector/restore-gtpm.sh index cd98a838b..77d4a553e 100755 --- a/clone-detector/restore-gtpm.sh +++ b/clone-detector/restore-gtpm.sh @@ -1,7 +1,8 @@ #!/bin/bash # # -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo "restoring gtpm indexes..." if [ -d "$rootPATH/gtpmindex" ]; then diff --git a/clone-detector/results.pairs b/clone-detector/results.pairs new file mode 100644 index 000000000..dd57a8ea8 --- /dev/null +++ b/clone-detector/results.pairs @@ -0,0 +1,44 @@ +1,13,1,6 +1,15,1,14 +1,25,1,10 +1,19,1,17 +1,24,1,8 +1,24,1,21 +1,20,1,17 +1,20,1,19 +1,26,1,25 +1,22,1,17 +1,28,1,8 +1,28,1,24 +1,28,1,21 +1,28,1,27 +1,22,1,19 +1,26,1,10 +1,22,1,20 +1,27,1,8 +1,23,1,17 +1,27,1,21 +1,27,1,24 +1,29,1,8 +1,29,1,27 +1,23,1,20 +1,23,1,22 +1,29,1,21 +1,29,1,24 +1,29,1,28 +1,23,1,19 +1,21,1,8 +1,33,1,15 +1,35,1,17 +1,33,1,14 +1,35,1,19 +1,35,1,23 +1,35,1,20 +1,35,1,22 +1,54,1,52 +1,54,1,50 +1,52,1,50 +1,57,1,56 +4,131,4,124 +4,163,4,158 +4,178,4,177 diff --git a/clone-detector/runnodes.sh b/clone-detector/runnodes.sh index 0a1fdff9d..17f7d9492 100755 --- a/clone-detector/runnodes.sh +++ b/clone-detector/runnodes.sh @@ -1,6 +1,7 @@ #!/bin/bash # run this script on master -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo $rootPATH ant -buildfile $rootPATH/build.xml clean cdi diff --git a/clone-detector/search_metadata.txt b/clone-detector/search_metadata.txt new file mode 100644 index 000000000..0cfbf0888 --- /dev/null +++ b/clone-detector/search_metadata.txt @@ -0,0 +1 @@ +2 diff --git a/clone-detector/search_status.sh b/clone-detector/search_status.sh index 5b8e3a045..ccbd300a5 100755 --- a/clone-detector/search_status.sh +++ b/clone-detector/search_status.sh @@ -1,5 +1,6 @@ #!/bin/bash -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` num=`cat search_metadata.txt` diff --git a/clone-detector/sourcerer-cc.properties b/clone-detector/sourcerer-cc.properties index 2dd958948..1bfe56ba8 100644 --- a/clone-detector/sourcerer-cc.properties +++ b/clone-detector/sourcerer-cc.properties @@ -9,7 +9,7 @@ IS_STATUS_REPORTER_ON=true LOG_PROCESSED_LINENUMBER_AFTER_X_LINES=50 # Ignore all files outside these bounds MIN_TOKENS=65 -MAX_TOKENS=500000 +MAX_TOKENS=50000 # Sharding speeds up search for very large datasets (>200K files). # For small-ish datasets, it doesn't matter so much diff --git a/clone-detector/splitquery.sh b/clone-detector/splitquery.sh index b35cc860a..0a160199a 100644 --- a/clone-detector/splitquery.sh +++ b/clone-detector/splitquery.sh @@ -4,7 +4,8 @@ #lines_per_part = int(total_lines + $N - 1) / $N # Configuration stuff -scriptPATH=`realpath $0` +# scriptPATH=`realpath $0` +scriptPATH="pwd" rootPATH=`dirname $scriptPATH` echo "inside splitquery " queryfile="$rootPATH/input/dataset/blocks.file" diff --git a/clone-detector/unevensplit.py b/clone-detector/unevensplit.py index cec985cab..e82b36f2b 100644 --- a/clone-detector/unevensplit.py +++ b/clone-detector/unevensplit.py @@ -27,10 +27,10 @@ def split(self): """ count=0 line_limit = self.base_x - print "line_limit is ", line_limit + print ("line_limit is ", line_limit) file_count=1 try: - print "creating split ",file_count + print("creating split ",file_count) self.outfile = open("query_{part}.file".format(part=file_count),'w') with open(self.input_filename,'r') as inputfile: for row in inputfile: @@ -42,15 +42,15 @@ def split(self): file_count+=1 count =0 line_limit =line_limit + math.ceil(0.5*self.base_x) - print "line_limit is ", line_limit - print "creating split ",file_count + print ("line_limit is ", line_limit) + print ("creating split ",file_count) self.outfile = open("query_{part}.file".format(part=file_count),'w') self.outfile.write(row) count+=1 self.outfile.flush() self.outfile.close() - except IOError, e: - print "Error: {error}".format(error=e) + except IOError as e: + print ("Error: {error}".format(error=e)) sys.exit(1) @@ -58,13 +58,13 @@ def get_num_lines_in_input_file(self): with open(self.input_filename) as f: for i, l in enumerate(f): pass - print "total lines in the inputfile: {0} ".format(i+1) + print ("total lines in the inputfile: {0} ".format(i+1)) return i + 1 def find_base_x(self): # formula for S = x + x+.5x + x+2*.5x...x + (N-1)*.5x self.base_x= math.ceil(float(2*self.total_lines)/(float((self.split_count+1)*(self.split_count+2)/2) - 1)) - print "base_x is ", self.base_x + print ("base_x is ", self.base_x) if __name__ == '__main__': @@ -73,7 +73,7 @@ def find_base_x(self): split_count = int(sys.argv[2]) params= {'split_count':split_count, 'input_filename' : input_file} - print "spliting {inputfile} in {count} chunks".format(inputfile=input_file,count=split_count) + print ("spliting {inputfile} in {count} chunks".format(inputfile=input_file,count=split_count)) splitter = Spliter(params) splitter.split() - print "splitting done!" \ No newline at end of file + print ("splitting done!") \ No newline at end of file