Fix inconsistency that could cause the optimization algorithm to osci…

…llate. Fixes cjlin1#225. # Background The optimization algorithm has three main calculations: 1. Select the working set `{i, j}` that [minimizes](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L829-L879) the decrease in the objective function. 2. Change `alpha[i]` and `alpha[j]` to [minimize](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L606-L691) the decrease in the objective function while respecting constraints. 3. [Update](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L698-L701) the gradient of the objective function according to the changes to `alpha[i]` and `alpha[j]`. All three calculations make use of the matrix `Q`, which is represented by the `QMatrix` [class](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L198). The `QMatrix` class has two main methods: - `get_Q`, which returns an array of values for a single column of the matrix; and - `get_QD`, which returns an array of diagonal values. # Problem `Q` values are of type `Qfloat` while `QD` values are of type `double`. `Qfloat` is currently [defined](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L16) as `float`, so there can be inconsistency in the diagonal values returned by `get_Q` and `get_QD`. For example, in cjlin1#225, one of the diagonal values is `181.05748749793070829` as `double` and `180.99411909539512067` as `float`. The first two calculations of the optimization algorithm access the diagonal values via `get_QD`. However, the third calculation accesses the diagonal values via `get_Q`. This inconsistency between the minimization calculations and the gradient update can cause the optimization algorithm to oscillate, as demonstrated by cjlin1#225. # Solution We change the type of `QD` values from `double` to `Qfloat`. This guarantees that all calculations are using the same values for the diagonal elements, eliminating the inconsistency. Note that this reverts the past commit 1c80a42. That commit changed the type of `QD` values from `Qfloat` to `double` to address a numerical issue. In a follow-up commit, we will allow `Qfloat` to be defined as `double` at runtime as a more general fix for numerical issues. # Future Changes The Java code will be updated similarly in a separate commit.
fumoboy007 · Dec 26, 2024 · 5378914 · 5378914
1 parent 35e5596
commit 5378914
Showing 1 changed file with 12 additions and 12 deletions.
diff --git a/svm.cpp b/svm.cpp
@@ -198,7 +198,7 @@ void Cache::swap_index(int i, int j)
 class QMatrix {
 public:
 	virtual Qfloat *get_Q(int column, int len) const = 0;
-	virtual double *get_QD() const = 0;
+	virtual Qfloat *get_QD() const = 0;
 	virtual void swap_index(int i, int j) const = 0;
 	virtual ~QMatrix() {}
 };
@@ -211,7 +211,7 @@ class Kernel: public QMatrix {
 	static double k_function(const svm_node *x, const svm_node *y,
 				 const svm_parameter& param);
 	virtual Qfloat *get_Q(int column, int len) const = 0;
-	virtual double *get_QD() const = 0;
+	virtual Qfloat *get_QD() const = 0;
 	virtual void swap_index(int i, int j) const	// no so const...
 	{
 		swap(x[i],x[j]);
@@ -418,7 +418,7 @@ class Solver {
 	char *alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
 	double *alpha;
 	const QMatrix *Q;
-	const double *QD;
+	const Qfloat *QD;
 	double eps;
 	double Cp,Cn;
 	double *p;
@@ -1275,7 +1275,7 @@ class SVC_Q: public Kernel
 	{
 		clone(y,y_,prob.l);
 		cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20)));
-		QD = new double[prob.l];
+		QD = new Qfloat[prob.l];
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
@@ -1295,7 +1295,7 @@ class SVC_Q: public Kernel
 		return data;
 	}
 
-	double *get_QD() const
+	Qfloat *get_QD() const
 	{
 		return QD;
 	}
@@ -1317,7 +1317,7 @@ class SVC_Q: public Kernel
 private:
 	schar *y;
 	Cache *cache;
-	double *QD;
+	Qfloat *QD;
 };
 
 class ONE_CLASS_Q: public Kernel
@@ -1327,7 +1327,7 @@ class ONE_CLASS_Q: public Kernel
 	:Kernel(prob.l, prob.x, param)
 	{
 		cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20)));
-		QD = new double[prob.l];
+		QD = new Qfloat[prob.l];
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
@@ -1344,7 +1344,7 @@ class ONE_CLASS_Q: public Kernel
 		return data;
 	}
 
-	double *get_QD() const
+	Qfloat *get_QD() const
 	{
 		return QD;
 	}
@@ -1363,7 +1363,7 @@ class ONE_CLASS_Q: public Kernel
 	}
 private:
 	Cache *cache;
-	double *QD;
+	Qfloat *QD;
 };
 
 class SVR_Q: public Kernel
@@ -1374,7 +1374,7 @@ class SVR_Q: public Kernel
 	{
 		l = prob.l;
 		cache = new Cache(l,(size_t)(param.cache_size*(1<<20)));
-		QD = new double[2*l];
+		QD = new Qfloat[2*l];
 		sign = new schar[2*l];
 		index = new int[2*l];
 		for(int k=0;k<l;k++)
@@ -1420,7 +1420,7 @@ class SVR_Q: public Kernel
 		return buf;
 	}
 
-	double *get_QD() const
+	Qfloat *get_QD() const
 	{
 		return QD;
 	}
@@ -1441,7 +1441,7 @@ class SVR_Q: public Kernel
 	int *index;
 	mutable int next_buffer;
 	Qfloat *buffer[2];
-	double *QD;
+	Qfloat *QD;
 };
 
 //