From 363033c6796f6a8e8c3b4a5b37bfc12d4b760757 Mon Sep 17 00:00:00 2001
From: "380133194@qq.com" <cqybp111>
Date: Wed, 19 Dec 2018 16:26:19 +0800
Subject: [PATCH] cmpatible to centos

---
 clone-detector/backup-gtpm.sh          |  3 +-
 clone-detector/controller.py           |  3 +-
 clone-detector/execute.sh              |  3 +-
 clone-detector/move-index.sh           |  3 +-
 clone-detector/preparequery.sh         |  3 +-
 clone-detector/replacenodeprefix.sh    |  3 +-
 clone-detector/restore-gtpm.sh         |  3 +-
 clone-detector/results.pairs           | 44 ++++++++++++++++++++++++++
 clone-detector/runnodes.sh             |  3 +-
 clone-detector/search_metadata.txt     |  1 +
 clone-detector/search_status.sh        |  3 +-
 clone-detector/sourcerer-cc.properties |  2 +-
 clone-detector/splitquery.sh           |  3 +-
 clone-detector/unevensplit.py          | 20 ++++++------
 14 files changed, 76 insertions(+), 21 deletions(-)
 create mode 100644 clone-detector/results.pairs
 create mode 100644 clone-detector/search_metadata.txt
diff --git a/clone-detector/backup-gtpm.sh b/clone-detector/backup-gtpm.sh
index 000ada0c6..c15c356e9 100755
--- a/clone-detector/backup-gtpm.sh
+++ b/clone-detector/backup-gtpm.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 #
 #
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo "backing up gtpm indexes..."
 rm -rf $rootPATH/backup_gtpm
diff --git a/clone-detector/controller.py b/clone-detector/controller.py
index 7a424dfd8..a411493e1 100644
--- a/clone-detector/controller.py
+++ b/clone-detector/controller.py
@@ -52,7 +52,8 @@ def execute(self):
             command = self.full_script_path('execute.sh', "1")
             command_params = command.split()
             returncode = self.run_command(
-                command_params, self.full_file_path("Log_execute_1.out"), self.full_file_path("Log_execute_1.err"))
+                command_params, self.full_file_path("Log_execute_1.out"), 
+                self.full_file_path("Log_execute_1.err"))
         self.current_state += 1
         if returncode == ScriptController.EXIT_SUCCESS:
             self.flush_state()
diff --git a/clone-detector/execute.sh b/clone-detector/execute.sh
index d37b0c63e..fd704aed8 100755
--- a/clone-detector/execute.sh
+++ b/clone-detector/execute.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo $rootPATH
 rm -rf $rootPATH/NODE*
diff --git a/clone-detector/move-index.sh b/clone-detector/move-index.sh
index 5428b2b4b..8b30f604c 100755
--- a/clone-detector/move-index.sh
+++ b/clone-detector/move-index.sh
@@ -2,7 +2,8 @@
 #
 # Run this script after indexing with just 1 node
 #
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo "Installing indexes..."
 
diff --git a/clone-detector/preparequery.sh b/clone-detector/preparequery.sh
index 57fdc1f0a..d08074030 100755
--- a/clone-detector/preparequery.sh
+++ b/clone-detector/preparequery.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 num_nodes="${1:-0}"
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo "rootpath is : $rootPATH"
 for i in $(seq 1 1 $num_nodes)
diff --git a/clone-detector/replacenodeprefix.sh b/clone-detector/replacenodeprefix.sh
index e4959e745..5a631f3dd 100755
--- a/clone-detector/replacenodeprefix.sh
+++ b/clone-detector/replacenodeprefix.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 num_nodes="${1:-0}"
 src_text="NODE_PREFIX=NODE"
diff --git a/clone-detector/restore-gtpm.sh b/clone-detector/restore-gtpm.sh
index cd98a838b..77d4a553e 100755
--- a/clone-detector/restore-gtpm.sh
+++ b/clone-detector/restore-gtpm.sh
@@ -1,7 +1,8 @@
 #!/bin/bash
 #
 #
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo "restoring gtpm indexes..."
 if [ -d "$rootPATH/gtpmindex" ]; then
diff --git a/clone-detector/results.pairs b/clone-detector/results.pairs
new file mode 100644
index 000000000..dd57a8ea8
--- /dev/null
+++ b/clone-detector/results.pairs
@@ -0,0 +1,44 @@
+1,13,1,6
+1,15,1,14
+1,25,1,10
+1,19,1,17
+1,24,1,8
+1,24,1,21
+1,20,1,17
+1,20,1,19
+1,26,1,25
+1,22,1,17
+1,28,1,8
+1,28,1,24
+1,28,1,21
+1,28,1,27
+1,22,1,19
+1,26,1,10
+1,22,1,20
+1,27,1,8
+1,23,1,17
+1,27,1,21
+1,27,1,24
+1,29,1,8
+1,29,1,27
+1,23,1,20
+1,23,1,22
+1,29,1,21
+1,29,1,24
+1,29,1,28
+1,23,1,19
+1,21,1,8
+1,33,1,15
+1,35,1,17
+1,33,1,14
+1,35,1,19
+1,35,1,23
+1,35,1,20
+1,35,1,22
+1,54,1,52
+1,54,1,50
+1,52,1,50
+1,57,1,56
+4,131,4,124
+4,163,4,158
+4,178,4,177
diff --git a/clone-detector/runnodes.sh b/clone-detector/runnodes.sh
index 0a1fdff9d..17f7d9492 100755
--- a/clone-detector/runnodes.sh
+++ b/clone-detector/runnodes.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # run this script on master
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo $rootPATH
 ant -buildfile $rootPATH/build.xml clean cdi
diff --git a/clone-detector/search_metadata.txt b/clone-detector/search_metadata.txt
new file mode 100644
index 000000000..0cfbf0888
--- /dev/null
+++ b/clone-detector/search_metadata.txt
@@ -0,0 +1 @@
+2
diff --git a/clone-detector/search_status.sh b/clone-detector/search_status.sh
index 5b8e3a045..ccbd300a5 100755
--- a/clone-detector/search_status.sh
+++ b/clone-detector/search_status.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 num=`cat search_metadata.txt`
 
diff --git a/clone-detector/sourcerer-cc.properties b/clone-detector/sourcerer-cc.properties
index 2dd958948..1bfe56ba8 100644
--- a/clone-detector/sourcerer-cc.properties
+++ b/clone-detector/sourcerer-cc.properties
@@ -9,7 +9,7 @@ IS_STATUS_REPORTER_ON=true
 LOG_PROCESSED_LINENUMBER_AFTER_X_LINES=50
 # Ignore all files outside these bounds
 MIN_TOKENS=65
-MAX_TOKENS=500000
+MAX_TOKENS=50000
 
 # Sharding speeds up search for very large datasets (>200K files).
 # For small-ish datasets, it doesn't matter so much
diff --git a/clone-detector/splitquery.sh b/clone-detector/splitquery.sh
index b35cc860a..0a160199a 100644
--- a/clone-detector/splitquery.sh
+++ b/clone-detector/splitquery.sh
@@ -4,7 +4,8 @@
 #lines_per_part = int(total_lines + $N - 1) / $N
 
 # Configuration stuff
-scriptPATH=`realpath $0`
+# scriptPATH=`realpath $0`
+scriptPATH="pwd"
 rootPATH=`dirname $scriptPATH`
 echo "inside splitquery "
 queryfile="$rootPATH/input/dataset/blocks.file"
diff --git a/clone-detector/unevensplit.py b/clone-detector/unevensplit.py
index cec985cab..e82b36f2b 100644
--- a/clone-detector/unevensplit.py
+++ b/clone-detector/unevensplit.py
@@ -27,10 +27,10 @@ def split(self):
         """
         count=0
         line_limit = self.base_x
-        print "line_limit is ", line_limit 
+        print ("line_limit is ", line_limit) 
         file_count=1
         try:
-            print "creating split ",file_count
+            print("creating split ",file_count)
             self.outfile = open("query_{part}.file".format(part=file_count),'w')
             with open(self.input_filename,'r') as inputfile:
                 for row in inputfile:
@@ -42,15 +42,15 @@ def split(self):
                         file_count+=1
                         count =0
                         line_limit =line_limit + math.ceil(0.5*self.base_x)
-                        print "line_limit is ", line_limit 
-                        print "creating split ",file_count 
+                        print ("line_limit is ", line_limit)
+                        print ("creating split ",file_count) 
                         self.outfile = open("query_{part}.file".format(part=file_count),'w')
                         self.outfile.write(row)
                     count+=1
             self.outfile.flush()
             self.outfile.close()
-        except IOError, e:
-            print "Error: {error}".format(error=e)
+        except IOError as e:
+            print ("Error: {error}".format(error=e))
             sys.exit(1)
                 
     
@@ -58,13 +58,13 @@ def get_num_lines_in_input_file(self):
         with open(self.input_filename) as f:
             for i, l in enumerate(f):
                 pass
-        print "total lines in the inputfile: {0} ".format(i+1)
+        print ("total lines in the inputfile: {0} ".format(i+1))
         return i + 1
     
     def find_base_x(self):
         # formula for S = x + x+.5x + x+2*.5x...x + (N-1)*.5x
         self.base_x= math.ceil(float(2*self.total_lines)/(float((self.split_count+1)*(self.split_count+2)/2) - 1))
-        print "base_x is ", self.base_x
+        print ("base_x is ", self.base_x)
         
 if __name__ == '__main__':
     
@@ -73,7 +73,7 @@ def find_base_x(self):
     split_count = int(sys.argv[2])
     params= {'split_count':split_count,
              'input_filename' : input_file}
-    print "spliting {inputfile} in {count} chunks".format(inputfile=input_file,count=split_count)
+    print ("spliting {inputfile} in {count} chunks".format(inputfile=input_file,count=split_count))
     splitter = Spliter(params)
     splitter.split()
-    print "splitting done!"
\ No newline at end of file
+    print ("splitting done!")
\ No newline at end of file