forked from ycjuan/libffm
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathffm-predict.cpp
119 lines (93 loc) · 2.78 KB
/
ffm-predict.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
#include "ffm.h"
using namespace std;
using namespace ffm;
struct Option {
string test_path, model_path, output_path;
ffm_float nds_rate;
};
string predict_help() {
return string(
"usage: ffm-predict [options] test_file model_file output_file\n"
"\n"
"options:\n"
"-nds-rate: set the negative down sampling rate for unbalanced data\n"
);
}
Option parse_option(int argc, char **argv) {
vector<string> args;
for (int i = 0; i < argc; i++)
args.push_back(string(argv[i]));
if (argc == 1)
throw invalid_argument(predict_help());
Option option;
if (argc < 4 || argc > 6)
throw invalid_argument("cannot parse argument");
option.test_path = string(args[1]);
option.model_path = string(args[2]);
option.output_path = string(args[3]);
ffm_int i = 1;
for (; i < argc; i++) {
if (args[i].compare("--nds-rate") == 0) {
if (i == argc - 1)
throw invalid_argument("need to specify nds_rate after --nds-rate");
i++;
option.nds_rate = atof(args[i].c_str());
if (option.nds_rate <= 0)
throw invalid_argument("number of nds_rate should be greater than zero");
}
}
return option;
}
void predict(string test_path, string model_path, string output_path, ffm_float nds_rate = 1.0) {
int const kMaxLineSize = 1000000;
FILE *f_in = fopen(test_path.c_str(), "r");
ofstream f_out(output_path);
char line[kMaxLineSize];
ffm_model *model = ffm_load_model(model_path.c_str());
ffm_double loss = 0;
vector<ffm_node> x;
ffm_int i = 0;
for (; fgets(line, kMaxLineSize, f_in) != nullptr; i++) {
x.clear();
char *y_char = strtok(line, " \t");
ffm_float y = (atoi(y_char) > 0) ? 1.0f : -1.0f;
while (true) {
char *field_char = strtok(nullptr, ":");
char *idx_char = strtok(nullptr, ":");
char *value_char = strtok(nullptr, " \t");
if (field_char == nullptr || *field_char == '\n')
break;
ffm_node N;
N.f = atoi(field_char);
N.j = atoi(idx_char);
N.v = atof(value_char);
x.push_back(N);
}
ffm_float y_bar = ffm_predict(x.data(), x.data() + x.size(), model, nds_rate);
loss -= y == 1 ? log(y_bar) : log(1 - y_bar);
f_out << y << "," << y_bar << "\n";
}
loss /= i;
cout << "logloss = " << fixed << setprecision(5) << loss << endl;
ffm_destroy_model(&model);
}
int main(int argc, char **argv) {
Option option;
try {
option = parse_option(argc, argv);
} catch (invalid_argument const &e) {
cout << e.what() << endl;
return 1;
}
predict(option.test_path, option.model_path, option.output_path, option.nds_rate);
}