Skip to content

Commit

Permalink
Fix inconsistency that could cause the optimization algorithm to osci…
Browse files Browse the repository at this point in the history
…llate.

Fixes cjlin1#225.

# Background

The optimization algorithm has three main calculations:
1. Select the working set `{i, j}` that [minimizes](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L829-L879) the decrease in the objective function.
2. Change `alpha[i]` and `alpha[j]` to [minimize](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L606-L691) the decrease in the objective function while respecting constraints.
3. [Update](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L698-L701) the gradient of the objective function according to the changes to `alpha[i]` and `alpha[j]`.

All three calculations make use of the matrix `Q`, which is represented by the `QMatrix` [class](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L198). The `QMatrix` class has two main methods:
- `get_Q`, which returns an array of values for a single column of the matrix; and
- `get_QD`, which returns an array of diagonal values.

# Problem

`Q` values are of type `Qfloat` while `QD` values are of type `double`. `Qfloat` is currently [defined](https://github.com/cjlin1/libsvm/blob/35e55962f7f03ce425bada0e6b9db79193e947f8/svm.cpp#L16) as `float`, so there can be inconsistency in the diagonal values returned by `get_Q` and `get_QD`. For example, in cjlin1#225, one of the diagonal values is `181.05748749793070829` as `double` and `180.99411909539512067` as `float`.

The first two calculations of the optimization algorithm access the diagonal values via `get_QD`. However, the third calculation accesses the diagonal values via `get_Q`. This inconsistency between the minimization calculations and the gradient update can cause the optimization algorithm to oscillate, as demonstrated by cjlin1#225.

# Solution

We change the type of `QD` values from `double` to `Qfloat`. This guarantees that all calculations are using the same values for the diagonal elements, eliminating the inconsistency.

Note that this reverts the past commit 1c80a42. That commit changed the type of `QD` values from `Qfloat` to `double` to address a numerical issue. In a follow-up commit, we will allow `Qfloat` to be defined as `double` at runtime as a more general fix for numerical issues.

# Future Changes

The Java code will be updated similarly in a separate commit.
  • Loading branch information
fumoboy007 committed Dec 26, 2024
1 parent 35e5596 commit 5378914
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions svm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ void Cache::swap_index(int i, int j)
class QMatrix {
public:
virtual Qfloat *get_Q(int column, int len) const = 0;
virtual double *get_QD() const = 0;
virtual Qfloat *get_QD() const = 0;
virtual void swap_index(int i, int j) const = 0;
virtual ~QMatrix() {}
};
Expand All @@ -211,7 +211,7 @@ class Kernel: public QMatrix {
static double k_function(const svm_node *x, const svm_node *y,
const svm_parameter& param);
virtual Qfloat *get_Q(int column, int len) const = 0;
virtual double *get_QD() const = 0;
virtual Qfloat *get_QD() const = 0;
virtual void swap_index(int i, int j) const // no so const...
{
swap(x[i],x[j]);
Expand Down Expand Up @@ -418,7 +418,7 @@ class Solver {
char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
double *alpha;
const QMatrix *Q;
const double *QD;
const Qfloat *QD;
double eps;
double Cp,Cn;
double *p;
Expand Down Expand Up @@ -1275,7 +1275,7 @@ class SVC_Q: public Kernel
{
clone(y,y_,prob.l);
cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20)));
QD = new double[prob.l];
QD = new Qfloat[prob.l];
for(int i=0;i<prob.l;i++)
QD[i] = (this->*kernel_function)(i,i);
}
Expand All @@ -1295,7 +1295,7 @@ class SVC_Q: public Kernel
return data;
}

double *get_QD() const
Qfloat *get_QD() const
{
return QD;
}
Expand All @@ -1317,7 +1317,7 @@ class SVC_Q: public Kernel
private:
schar *y;
Cache *cache;
double *QD;
Qfloat *QD;
};

class ONE_CLASS_Q: public Kernel
Expand All @@ -1327,7 +1327,7 @@ class ONE_CLASS_Q: public Kernel
:Kernel(prob.l, prob.x, param)
{
cache = new Cache(prob.l,(size_t)(param.cache_size*(1<<20)));
QD = new double[prob.l];
QD = new Qfloat[prob.l];
for(int i=0;i<prob.l;i++)
QD[i] = (this->*kernel_function)(i,i);
}
Expand All @@ -1344,7 +1344,7 @@ class ONE_CLASS_Q: public Kernel
return data;
}

double *get_QD() const
Qfloat *get_QD() const
{
return QD;
}
Expand All @@ -1363,7 +1363,7 @@ class ONE_CLASS_Q: public Kernel
}
private:
Cache *cache;
double *QD;
Qfloat *QD;
};

class SVR_Q: public Kernel
Expand All @@ -1374,7 +1374,7 @@ class SVR_Q: public Kernel
{
l = prob.l;
cache = new Cache(l,(size_t)(param.cache_size*(1<<20)));
QD = new double[2*l];
QD = new Qfloat[2*l];
sign = new schar[2*l];
index = new int[2*l];
for(int k=0;k<l;k++)
Expand Down Expand Up @@ -1420,7 +1420,7 @@ class SVR_Q: public Kernel
return buf;
}

double *get_QD() const
Qfloat *get_QD() const
{
return QD;
}
Expand All @@ -1441,7 +1441,7 @@ class SVR_Q: public Kernel
int *index;
mutable int next_buffer;
Qfloat *buffer[2];
double *QD;
Qfloat *QD;
};

//
Expand Down

0 comments on commit 5378914

Please sign in to comment.