kochigami · kochigami · Oct 3, 2018
diff --git a/nominal/paired_two_sample_test_of_nominal_scale.py b/nominal/paired_two_sample_test_of_nominal_scale.py
@@ -12,6 +12,7 @@
 class PairedTwoSampleTestOfNominalScale:
     def test(self, data):
         """
+        There is a question which we can answer yes (1) or no (0).
         data = {"Before": [1,1,1,1,1,...,0], "After": [1,1,1,1,1,...,0]}
 
                        Yes   No   Total
@@ -26,10 +27,10 @@ def test(self, data):
         number of Yes => No: b
         number of No => Yes: c
         """
-        # check data length is 2
-        if len(data.keys()) != 2 and len(data[data.keys()[0]]) != len(data[data.keys()[1]]):
-            print "Please check the components of your data."
-            print "length of data should be four"
+        # check if the number of samples are appropriate 
+        if len(data.keys()) != 2 or len(data[data.keys()[0]]) != len(data[data.keys()[1]]):
+            print ("Please check the components of your data.")
+            print ("the number of each data should be equal")
             sys.exit()
         else:
             b = 0
@@ -39,13 +40,17 @@ def test(self, data):
                     b += 1
                 elif data[(data.keys())[0]][i] == 0 and data[(data.keys())[1]][i] == 1:
                     c += 1
-            # z = abs(b-c)-1 / root(b+c)
-            # chi2 = pow((abs(b-c)-1), 2.0) / (b+c)
+            # calculating chi-square value with Yate's continuity correction (イェーツの連続修正) 
+            chi2 = pow((abs(b-c)-1), 2.0) / (b+c)
+
+            '''
+            If there is no consideration on Yate's continuity correction:
             chi2 = pow(abs(b-c) - 1.0, 2.0) / (b+c)
+            '''
             p = stats.chi2.pdf(chi2, df=1)
-            # pdf: probability density function
-            # cdf: Cumulative distribution function
-            # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2.html
             print "chi2 value: {}".format(chi2)
             print "p value: {}".format(p)
             return p
+
+if __name__ == '__main__':
+    pass
diff --git a/nominal/unpaired_two_sample_test_of_nominal_scale.py b/nominal/unpaired_two_sample_test_of_nominal_scale.py
@@ -6,27 +6,25 @@
 
 class UnpairedTwoSampleTestOfNominalScale:
     def test(self, data):
-        # check data length
-        if len(data.keys()) != 2:
-            print "len(data.keys()) should be two"
-            sys.exit()
-        elif len(data[(data.keys())[0]]) != len(data[(data.keys())[1]]):
-            print "len(data[(data.keys())[0]]) and len(data[(data.keys())[1]]) should be same"
+        # check if the number of samples are appropriate 
+        if len(data.keys()) != 2 or len(data[data.keys()[0]]) != len(data[data.keys()[1]]):
+            print ("Please check the components of your data.")
+            print ("the number of each data should be equal")
             sys.exit()
         else:
             """
-            Is there any difference between the number of people who satisfies Condition1 and Yes (a) and that of people who satisfies Condition2 and Yes (c)?
+            Is there any difference between the number of people who satisfies Condition1 as Yes (a) and that of people who satisfies Condition2 as Yes (c)?
             data = {"Condition1": [a, b], "Condition2": [c, d]}
-            OrderedDict([('Illness', [52, 8]), ('Healty', [48, 42])])
+            ex. OrderedDict([('Illness', [52, 8]), ('Healty', [48, 42])])
 
-                            Yes   No   Total <= sum_row
+                            Yes   No   Total <= sum_row: [a+b, c+d]
               --------------------------------------
               Condition1     a     b    a+b
               Condition2     c     d    c+d
               --------------------------------------
               Total         a+c   b+d    n (= a+b+c+d)
                ^
-               |_ sum_column 
+               |_ sum_column: [a+c, b+d] 
 
             """
             # calculate n