diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py index 2ebdeb764a80..625fc28fe60c 100644 --- a/machine_learning/sequential_minimum_optimization.py +++ b/machine_learning/sequential_minimum_optimization.py @@ -1,11 +1,9 @@ """ - Implementation of sequential minimal optimization (SMO) for support vector machines - (SVM). +Sequential minimal optimization (SMO) for support vector machines (SVM) - Sequential minimal optimization (SMO) is an algorithm for solving the quadratic - programming (QP) problem that arises during the training of support vector - machines. - It was invented by John Platt in 1998. +Sequential minimal optimization (SMO) is an algorithm for solving the quadratic +programming (QP) problem that arises during the training of SVMs. It was invented by +John Platt in 1998. Input: 0: type: numpy.ndarray. @@ -124,8 +122,7 @@ def fit(self): b_old = self._b self._b = b - # 4: update error value,here we only calculate those non-bound samples' - # error + # 4: update error, here we only calculate the error for non-bound samples self._unbound = [i for i in self._all_samples if self._is_unbound(i)] for s in self.unbound: if s in (i1, i2): @@ -136,7 +133,7 @@ def fit(self): + (self._b - b_old) ) - # if i1 or i2 is non-bound,update there error value to zero + # if i1 or i2 is non-bound, update their error value to zero if self._is_unbound(i1): self._error[i1] = 0 if self._is_unbound(i2): @@ -161,7 +158,7 @@ def predict(self, test_samples, classify=True): results.append(result) return np.array(results) - # Check if alpha violate KKT condition + # Check if alpha violates the KKT condition def _check_obey_kkt(self, index): alphas = self.alphas tol = self._tol @@ -172,20 +169,19 @@ def _check_obey_kkt(self, index): # Get value calculated from kernel function def _k(self, i1, i2): - # for test samples,use Kernel function + # for test samples, use kernel function if isinstance(i2, np.ndarray): return self.Kernel(self.samples[i1], i2) - # for train samples,Kernel values have been saved in matrix + # for training samples, kernel values have been saved in matrix else: return self._K_matrix[i1, i2] - # Get sample's error + # Get error for sample def _e(self, index): """ Two cases: - 1:Sample[index] is non-bound,Fetch error from list: _error - 2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi - + 1: Sample[index] is non-bound, fetch error from list: _error + 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi """ # get from error data if self._is_unbound(index): @@ -196,7 +192,7 @@ def _e(self, index): yi = self.tags[index] return gx - yi - # Calculate Kernel matrix of all possible i1,i2 ,saving time + # Calculate kernel matrix of all possible i1, i2, saving time def _calculate_k_matrix(self): k_matrix = np.zeros([self.length, self.length]) for i in self._all_samples: @@ -206,7 +202,7 @@ def _calculate_k_matrix(self): ) return k_matrix - # Predict test sample's tag + # Predict tag for test sample def _predict(self, sample): k = self._k predicted_value = ( @@ -222,30 +218,31 @@ def _predict(self, sample): # Choose alpha1 and alpha2 def _choose_alphas(self): - locis = yield from self._choose_a1() - if not locis: + loci = yield from self._choose_a1() + if not loci: return None - return locis + return loci def _choose_a1(self): """ - Choose first alpha ;steps: - 1:First loop over all sample - 2:Second loop over all non-bound samples till all non-bound samples does not - voilate kkt condition. - 3:Repeat this two process endlessly,till all samples does not voilate kkt - condition samples after first loop. + Choose first alpha + Steps: + 1: First loop over all samples + 2: Second loop over all non-bound samples until no non-bound samples violate + the KKT condition. + 3: Repeat these two processes until no samples violate the KKT condition + after the first loop. """ while True: all_not_obey = True # all sample - print("scanning all sample!") + print("Scanning all samples!") for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]: all_not_obey = False yield from self._choose_a2(i1) # non-bound sample - print("scanning non-bound sample!") + print("Scanning non-bound samples!") while True: not_obey = True for i1 in [ @@ -256,20 +253,21 @@ def _choose_a1(self): not_obey = False yield from self._choose_a2(i1) if not_obey: - print("all non-bound samples fit the KKT condition!") + print("All non-bound samples satisfy the KKT condition!") break if all_not_obey: - print("all samples fit the KKT condition! Optimization done!") + print("All samples satisfy the KKT condition!") break return False def _choose_a2(self, i1): """ - Choose the second alpha by using heuristic algorithm ;steps: - 1: Choose alpha2 which gets the maximum step size (|E1 - E2|). - 2: Start in a random point,loop over all non-bound samples till alpha1 and + Choose the second alpha using a heuristic algorithm + Steps: + 1: Choose alpha2 that maximizes the step size (|E1 - E2|). + 2: Start in a random point, loop over all non-bound samples till alpha1 and alpha2 are optimized. - 3: Start in a random point,loop over all samples till alpha1 and alpha2 are + 3: Start in a random point, loop over all samples till alpha1 and alpha2 are optimized. """ self._unbound = [i for i in self._all_samples if self._is_unbound(i)] @@ -306,7 +304,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2): if i1 == i2: return None, None - # calculate L and H which bound the new alpha2 + # calculate L and H which bound the new alpha2 s = y1 * y2 if s == -1: l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1) # noqa: E741 @@ -320,7 +318,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2): k22 = k(i2, i2) k12 = k(i1, i2) - # select the new alpha2 which could get the minimal objectives + # select the new alpha2 which could achieve the minimal objectives if (eta := k11 + k22 - 2.0 * k12) > 0.0: a2_new_unc = a2 + (y2 * (e1 - e2)) / eta # a2_new has a boundary @@ -335,7 +333,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2): l1 = a1 + s * (a2 - l) h1 = a1 + s * (a2 - h) - # way 1 + # Method 1 f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2) f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2) ol = ( @@ -353,9 +351,8 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2): + s * h * h1 * k(i1, i2) ) """ - # way 2 - Use objective function check which alpha2 new could get the minimal - objectives + Method 2: Use objective function to check which alpha2_new could achieve the + minimal objectives """ if ol < (oh - self._eps): a2_new = l @@ -375,7 +372,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2): return a1_new, a2_new - # Normalise data using min_max way + # Normalize data using min-max method def _norm(self, data): if self._init: self._min = np.min(data, axis=0) @@ -424,7 +421,7 @@ def _rbf(self, v1, v2): def _check(self): if self._kernel == self._rbf and self.gamma < 0: - raise ValueError("gamma value must greater than 0") + raise ValueError("gamma value must be non-negative") def _get_kernel(self, kernel_name): maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf} @@ -444,27 +441,27 @@ def call_func(*args, **kwargs): start_time = time.time() func(*args, **kwargs) end_time = time.time() - print(f"smo algorithm cost {end_time - start_time} seconds") + print(f"SMO algorithm cost {end_time - start_time} seconds") return call_func @count_time -def test_cancel_data(): - print("Hello!\nStart test svm by smo algorithm!") +def test_cancer_data(): + print("Hello!\nStart test SVM using the SMO algorithm!") # 0: download dataset and load into pandas' dataframe - if not os.path.exists(r"cancel_data.csv"): + if not os.path.exists(r"cancer_data.csv"): request = urllib.request.Request( # noqa: S310 CANCER_DATASET_URL, headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"}, ) response = urllib.request.urlopen(request) # noqa: S310 content = response.read().decode("utf-8") - with open(r"cancel_data.csv", "w") as f: + with open(r"cancer_data.csv", "w") as f: f.write(content) data = pd.read_csv( - "cancel_data.csv", + "cancer_data.csv", header=None, dtype={0: str}, # Assuming the first column contains string data ) @@ -479,14 +476,14 @@ def test_cancel_data(): train_data, test_data = samples[:328, :], samples[328:, :] test_tags, test_samples = test_data[:, 0], test_data[:, 1:] - # 3: choose kernel function,and set initial alphas to zero(optional) - mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) + # 3: choose kernel function, and set initial alphas to zero (optional) + my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) al = np.zeros(train_data.shape[0]) # 4: calculating best alphas using SMO algorithm and predict test_data samples mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, alpha_list=al, cost=0.4, b=0.0, @@ -501,30 +498,30 @@ def test_cancel_data(): for i in range(test_tags.shape[0]): if test_tags[i] == predict[i]: score += 1 - print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}") + print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}") print(f"Rough Accuracy: {score / test_tags.shape[0]}") def test_demonstration(): # change stdout - print("\nStart plot,please wait!!!") + print("\nStarting plot, please wait!") sys.stdout = open(os.devnull, "w") ax1 = plt.subplot2grid((2, 2), (0, 0)) ax2 = plt.subplot2grid((2, 2), (0, 1)) ax3 = plt.subplot2grid((2, 2), (1, 0)) ax4 = plt.subplot2grid((2, 2), (1, 1)) - ax1.set_title("linear svm,cost:0.1") + ax1.set_title("Linear SVM, cost = 0.1") test_linear_kernel(ax1, cost=0.1) - ax2.set_title("linear svm,cost:500") + ax2.set_title("Linear SVM, cost = 500") test_linear_kernel(ax2, cost=500) - ax3.set_title("rbf kernel svm,cost:0.1") + ax3.set_title("RBF kernel SVM, cost = 0.1") test_rbf_kernel(ax3, cost=0.1) - ax4.set_title("rbf kernel svm,cost:500") + ax4.set_title("RBF kernel SVM, cost = 500") test_rbf_kernel(ax4, cost=500) sys.stdout = sys.__stdout__ - print("Plot done!!!") + print("Plot done!") def test_linear_kernel(ax, cost): @@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost): scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) - mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) + my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, cost=cost, tolerance=0.001, auto_norm=False, @@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost): scaler = StandardScaler() train_x_scaled = scaler.fit_transform(train_x, train_y) train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled)) - mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) + my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5) mysvm = SmoSVM( train=train_data, - kernel_func=mykernel, + kernel_func=my_kernel, cost=cost, tolerance=0.001, auto_norm=False, @@ -571,11 +568,11 @@ def plot_partition_boundary( model, train_data, ax, resolution=100, colors=("b", "k", "r") ): """ - We can not get the optimum w of our kernel svm model which is different from linear - svm. For this reason, we generate randomly distributed points with high desity and - prediced values of these points are calculated by using our trained model. Then we - could use this prediced values to draw contour map. - And this contour map can represent svm's partition boundary. + We cannot get the optimal w of our kernel SVM model, which is different from a + linear SVM. For this reason, we generate randomly distributed points with high + density, and predicted values of these points are calculated using our trained + model. Then we could use this predicted values to draw contour map, and this contour + map represents the SVM's partition boundary. """ train_data_x = train_data[:, 1] train_data_y = train_data[:, 2] @@ -620,6 +617,6 @@ def plot_partition_boundary( if __name__ == "__main__": - test_cancel_data() + test_cancer_data() test_demonstration() plt.show()