This repository has been archived by the owner on Oct 8, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 153
KDDCup 2012 track 2 CTR prediction AdaGrad AdaDelta
Makoto YUI edited this page May 4, 2015
·
4 revisions
Note adagrad/adadelta is supported from hivemall v0.3b2 or later (or in the master branch).
add jar ./tmp/hivemall-with-dependencies.jar;
source ./tmp/define-all.hive;
use kdd12track2;
-- SET mapreduce.framework.name=yarn;
-- SET hive.execution.engine=mr;
-- SET mapreduce.framework.name=yarn-tez;
-- SET hive.execution.engine=tez;
SET mapred.reduce.tasks=32; -- [optional] set the explicit number of reducers to make group-by aggregation faster
drop table adagrad_model;
create table adagrad_model
as
select
feature,
avg(weight) as weight
from
(select
adagrad(features,label) as (feature,weight)
from
training_orcfile
) t
group by feature;
drop table adagrad_predict;
create table adagrad_predict
ROW FORMAT DELIMITED
FIELDS TERMINATED BY "\t"
LINES TERMINATED BY "\n"
STORED AS TEXTFILE
as
select
t.rowid,
sigmoid(sum(m.weight)) as prob
from
testing_exploded t LEFT OUTER JOIN
adagrad_model m ON (t.feature = m.feature)
group by
t.rowid
order by
rowid ASC;
hadoop fs -getmerge /user/hive/warehouse/kdd12track2.db/adagrad_predict adagrad_predict.tbl
gawk -F "\t" '{print $2;}' adagrad_predict.tbl > adagrad_predict.submit
pypy scoreKDD.py KDD_Track2_solution.csv adagrad_predict.submit
AUC(SGD) : 0.739351
AUC(ADAGRAD) : 0.743279
drop table adadelta_model;
create table adadelta_model
as
select
feature,
cast(avg(weight) as float) as weight
from
(select
adadelta(features,label) as (feature,weight)
from
training_orcfile
) t
group by feature;
drop table adadelta_predict;
create table adadelta_predict
ROW FORMAT DELIMITED
FIELDS TERMINATED BY "\t"
LINES TERMINATED BY "\n"
STORED AS TEXTFILE
as
select
t.rowid,
sigmoid(sum(m.weight)) as prob
from
testing_exploded t LEFT OUTER JOIN
adadelta_model m ON (t.feature = m.feature)
group by
t.rowid
order by
rowid ASC;
hadoop fs -getmerge /user/hive/warehouse/kdd12track2.db/adadelta_predict adadelta_predict.tbl
gawk -F "\t" '{print $2;}' adadelta_predict.tbl > adadelta_predict.submit
pypy scoreKDD.py KDD_Track2_solution.csv adadelta_predict.submit
AUC(SGD) : 0.739351
AUC(ADAGRAD) : 0.743279
AUC(AdaDelta) : 0.746878