diff --git a/machine_learning/sequential_minimum_optimization.py b/machine_learning/sequential_minimum_optimization.py
index 2ebdeb764a80..625fc28fe60c 100644
--- a/machine_learning/sequential_minimum_optimization.py
+++ b/machine_learning/sequential_minimum_optimization.py
@@ -1,11 +1,9 @@
 """
-    Implementation of sequential minimal optimization (SMO) for support vector machines
-    (SVM).
+Sequential minimal optimization (SMO) for support vector machines (SVM)
 
-    Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
-    programming (QP) problem that arises during the training of support vector
-    machines.
-    It was invented by John Platt in 1998.
+Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
+programming (QP) problem that arises during the training of SVMs. It was invented by
+John Platt in 1998.
 
 Input:
     0: type: numpy.ndarray.
@@ -124,8 +122,7 @@ def fit(self):
             b_old = self._b
             self._b = b
 
-            # 4:  update error value,here we only calculate those non-bound samples'
-            #     error
+            # 4: update error, here we only calculate the error for non-bound samples
             self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
             for s in self.unbound:
                 if s in (i1, i2):
@@ -136,7 +133,7 @@ def fit(self):
                     + (self._b - b_old)
                 )
 
-            # if i1 or i2 is non-bound,update there error value to zero
+            # if i1 or i2 is non-bound, update their error value to zero
             if self._is_unbound(i1):
                 self._error[i1] = 0
             if self._is_unbound(i2):
@@ -161,7 +158,7 @@ def predict(self, test_samples, classify=True):
                 results.append(result)
         return np.array(results)
 
-    # Check if alpha violate KKT condition
+    # Check if alpha violates the KKT condition
     def _check_obey_kkt(self, index):
         alphas = self.alphas
         tol = self._tol
@@ -172,20 +169,19 @@ def _check_obey_kkt(self, index):
 
     # Get value calculated from kernel function
     def _k(self, i1, i2):
-        # for test samples,use Kernel function
+        # for test samples, use kernel function
         if isinstance(i2, np.ndarray):
             return self.Kernel(self.samples[i1], i2)
-        # for train samples,Kernel values have been saved in matrix
+        # for training samples, kernel values have been saved in matrix
         else:
             return self._K_matrix[i1, i2]
 
-    # Get sample's error
+    # Get error for sample
     def _e(self, index):
         """
         Two cases:
-            1:Sample[index] is non-bound,Fetch error from list: _error
-            2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi
-
+            1: Sample[index] is non-bound, fetch error from list: _error
+            2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
         """
         # get from error data
         if self._is_unbound(index):
@@ -196,7 +192,7 @@ def _e(self, index):
             yi = self.tags[index]
             return gx - yi
 
-    # Calculate Kernel matrix of all possible i1,i2 ,saving time
+    # Calculate kernel matrix of all possible i1, i2, saving time
     def _calculate_k_matrix(self):
         k_matrix = np.zeros([self.length, self.length])
         for i in self._all_samples:
@@ -206,7 +202,7 @@ def _calculate_k_matrix(self):
                 )
         return k_matrix
 
-    # Predict test sample's tag
+    # Predict tag for test sample
     def _predict(self, sample):
         k = self._k
         predicted_value = (
@@ -222,30 +218,31 @@ def _predict(self, sample):
 
     # Choose alpha1 and alpha2
     def _choose_alphas(self):
-        locis = yield from self._choose_a1()
-        if not locis:
+        loci = yield from self._choose_a1()
+        if not loci:
             return None
-        return locis
+        return loci
 
     def _choose_a1(self):
         """
-        Choose first alpha ;steps:
-           1:First loop over all sample
-           2:Second loop over all non-bound samples till all non-bound samples does not
-               voilate kkt condition.
-           3:Repeat this two process endlessly,till all samples does not voilate kkt
-               condition samples after first loop.
+        Choose first alpha
+        Steps:
+            1: First loop over all samples
+            2: Second loop over all non-bound samples until no non-bound samples violate
+               the KKT condition.
+            3: Repeat these two processes until no samples violate the KKT condition
+               after the first loop.
         """
         while True:
             all_not_obey = True
             # all sample
-            print("scanning all sample!")
+            print("Scanning all samples!")
             for i1 in [i for i in self._all_samples if self._check_obey_kkt(i)]:
                 all_not_obey = False
                 yield from self._choose_a2(i1)
 
             # non-bound sample
-            print("scanning non-bound sample!")
+            print("Scanning non-bound samples!")
             while True:
                 not_obey = True
                 for i1 in [
@@ -256,20 +253,21 @@ def _choose_a1(self):
                     not_obey = False
                     yield from self._choose_a2(i1)
                 if not_obey:
-                    print("all non-bound samples fit the KKT condition!")
+                    print("All non-bound samples satisfy the KKT condition!")
                     break
             if all_not_obey:
-                print("all samples fit the KKT condition! Optimization done!")
+                print("All samples satisfy the KKT condition!")
                 break
         return False
 
     def _choose_a2(self, i1):
         """
-        Choose the second alpha by using heuristic algorithm ;steps:
-           1: Choose alpha2 which gets the maximum step size (|E1 - E2|).
-           2: Start in a random point,loop over all non-bound samples till alpha1 and
+        Choose the second alpha using a heuristic algorithm
+        Steps:
+            1: Choose alpha2 that maximizes the step size (|E1 - E2|).
+            2: Start in a random point, loop over all non-bound samples till alpha1 and
                alpha2 are optimized.
-           3: Start in a random point,loop over all samples till alpha1 and alpha2 are
+            3: Start in a random point, loop over all samples till alpha1 and alpha2 are
                optimized.
         """
         self._unbound = [i for i in self._all_samples if self._is_unbound(i)]
@@ -306,7 +304,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
         if i1 == i2:
             return None, None
 
-        # calculate L and H  which bound the new alpha2
+        # calculate L and H which bound the new alpha2
         s = y1 * y2
         if s == -1:
             l, h = max(0.0, a2 - a1), min(self._c, self._c + a2 - a1)  # noqa: E741
@@ -320,7 +318,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
         k22 = k(i2, i2)
         k12 = k(i1, i2)
 
-        # select the new alpha2 which could get the minimal objectives
+        # select the new alpha2 which could achieve the minimal objectives
         if (eta := k11 + k22 - 2.0 * k12) > 0.0:
             a2_new_unc = a2 + (y2 * (e1 - e2)) / eta
             # a2_new has a boundary
@@ -335,7 +333,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
             l1 = a1 + s * (a2 - l)
             h1 = a1 + s * (a2 - h)
 
-            # way 1
+            # Method 1
             f1 = y1 * (e1 + b) - a1 * k(i1, i1) - s * a2 * k(i1, i2)
             f2 = y2 * (e2 + b) - a2 * k(i2, i2) - s * a1 * k(i1, i2)
             ol = (
@@ -353,9 +351,8 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
                 + s * h * h1 * k(i1, i2)
             )
             """
-            # way 2
-            Use objective function check which alpha2 new could get the minimal
-            objectives
+            Method 2: Use objective function to check which alpha2_new could achieve the
+            minimal objectives
             """
             if ol < (oh - self._eps):
                 a2_new = l
@@ -375,7 +372,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
 
         return a1_new, a2_new
 
-    # Normalise data using min_max way
+    # Normalize data using min-max method
     def _norm(self, data):
         if self._init:
             self._min = np.min(data, axis=0)
@@ -424,7 +421,7 @@ def _rbf(self, v1, v2):
 
     def _check(self):
         if self._kernel == self._rbf and self.gamma < 0:
-            raise ValueError("gamma value must greater than 0")
+            raise ValueError("gamma value must be non-negative")
 
     def _get_kernel(self, kernel_name):
         maps = {"linear": self._linear, "poly": self._polynomial, "rbf": self._rbf}
@@ -444,27 +441,27 @@ def call_func(*args, **kwargs):
         start_time = time.time()
         func(*args, **kwargs)
         end_time = time.time()
-        print(f"smo algorithm cost {end_time - start_time} seconds")
+        print(f"SMO algorithm cost {end_time - start_time} seconds")
 
     return call_func
 
 
 @count_time
-def test_cancel_data():
-    print("Hello!\nStart test svm by smo algorithm!")
+def test_cancer_data():
+    print("Hello!\nStart test SVM using the SMO algorithm!")
     # 0: download dataset and load into pandas' dataframe
-    if not os.path.exists(r"cancel_data.csv"):
+    if not os.path.exists(r"cancer_data.csv"):
         request = urllib.request.Request(  # noqa: S310
             CANCER_DATASET_URL,
             headers={"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"},
         )
         response = urllib.request.urlopen(request)  # noqa: S310
         content = response.read().decode("utf-8")
-        with open(r"cancel_data.csv", "w") as f:
+        with open(r"cancer_data.csv", "w") as f:
             f.write(content)
 
     data = pd.read_csv(
-        "cancel_data.csv",
+        "cancer_data.csv",
         header=None,
         dtype={0: str},  # Assuming the first column contains string data
     )
@@ -479,14 +476,14 @@ def test_cancel_data():
     train_data, test_data = samples[:328, :], samples[328:, :]
     test_tags, test_samples = test_data[:, 0], test_data[:, 1:]
 
-    # 3: choose kernel function,and set initial alphas to zero(optional)
-    mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
+    # 3: choose kernel function, and set initial alphas to zero (optional)
+    my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
     al = np.zeros(train_data.shape[0])
 
     # 4: calculating best alphas using SMO algorithm and predict test_data samples
     mysvm = SmoSVM(
         train=train_data,
-        kernel_func=mykernel,
+        kernel_func=my_kernel,
         alpha_list=al,
         cost=0.4,
         b=0.0,
@@ -501,30 +498,30 @@ def test_cancel_data():
     for i in range(test_tags.shape[0]):
         if test_tags[i] == predict[i]:
             score += 1
-    print(f"\nall: {test_num}\nright: {score}\nfalse: {test_num - score}")
+    print(f"\nAll: {test_num}\nCorrect: {score}\nIncorrect: {test_num - score}")
     print(f"Rough Accuracy: {score / test_tags.shape[0]}")
 
 
 def test_demonstration():
     # change stdout
-    print("\nStart plot,please wait!!!")
+    print("\nStarting plot, please wait!")
     sys.stdout = open(os.devnull, "w")
 
     ax1 = plt.subplot2grid((2, 2), (0, 0))
     ax2 = plt.subplot2grid((2, 2), (0, 1))
     ax3 = plt.subplot2grid((2, 2), (1, 0))
     ax4 = plt.subplot2grid((2, 2), (1, 1))
-    ax1.set_title("linear svm,cost:0.1")
+    ax1.set_title("Linear SVM, cost = 0.1")
     test_linear_kernel(ax1, cost=0.1)
-    ax2.set_title("linear svm,cost:500")
+    ax2.set_title("Linear SVM, cost = 500")
     test_linear_kernel(ax2, cost=500)
-    ax3.set_title("rbf kernel svm,cost:0.1")
+    ax3.set_title("RBF kernel SVM, cost = 0.1")
     test_rbf_kernel(ax3, cost=0.1)
-    ax4.set_title("rbf kernel svm,cost:500")
+    ax4.set_title("RBF kernel SVM, cost = 500")
     test_rbf_kernel(ax4, cost=500)
 
     sys.stdout = sys.__stdout__
-    print("Plot done!!!")
+    print("Plot done!")
 
 
 def test_linear_kernel(ax, cost):
@@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):
     scaler = StandardScaler()
     train_x_scaled = scaler.fit_transform(train_x, train_y)
     train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
-    mykernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
+    my_kernel = Kernel(kernel="linear", degree=5, coef0=1, gamma=0.5)
     mysvm = SmoSVM(
         train=train_data,
-        kernel_func=mykernel,
+        kernel_func=my_kernel,
         cost=cost,
         tolerance=0.001,
         auto_norm=False,
@@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):
     scaler = StandardScaler()
     train_x_scaled = scaler.fit_transform(train_x, train_y)
     train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
-    mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
+    my_kernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
     mysvm = SmoSVM(
         train=train_data,
-        kernel_func=mykernel,
+        kernel_func=my_kernel,
         cost=cost,
         tolerance=0.001,
         auto_norm=False,
@@ -571,11 +568,11 @@ def plot_partition_boundary(
     model, train_data, ax, resolution=100, colors=("b", "k", "r")
 ):
     """
-    We can not get the optimum w of our kernel svm model which is different from linear
-    svm.  For this reason, we generate randomly distributed points with high desity and
-    prediced values of these points are calculated by using our trained model. Then we
-    could use this prediced values to draw contour map.
-    And this contour map can represent svm's partition boundary.
+    We cannot get the optimal w of our kernel SVM model, which is different from a
+    linear SVM.  For this reason, we generate randomly distributed points with high
+    density, and predicted values of these points are calculated using our trained
+    model. Then we could use this predicted values to draw contour map, and this contour
+    map represents the SVM's partition boundary.
     """
     train_data_x = train_data[:, 1]
     train_data_y = train_data[:, 2]
@@ -620,6 +617,6 @@ def plot_partition_boundary(
 
 
 if __name__ == "__main__":
-    test_cancel_data()
+    test_cancer_data()
     test_demonstration()
     plt.show()