This repository has been archived by the owner on Oct 8, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 153
a9a binary dataset
Makoto YUI edited this page Jan 7, 2016
·
12 revisions
http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a
cd /mnt/archive/datasets/classification/a9a
awk -f conv.awk a9a | sed -e "s/+1/1/" | sed -e "s/-1/0/" > a9a.train
awk -f conv.awk a9a.t | sed -e "s/+1/1/" | sed -e "s/-1/0/" > a9a.test
hadoop fs -mkdir -p /dataset/a9a/train
hadoop fs -mkdir -p /dataset/a9a/test
hadoop fs -copyFromLocal a9a.train /dataset/a9a/train
hadoop fs -copyFromLocal a9a.test /dataset/a9a/test
create database a9a;
use a9a;
create external table a9atrain (
rowid int,
label float,
features ARRAY<STRING>
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY "," STORED AS TEXTFILE LOCATION '/dataset/a9a/train';
create external table a9atest (
rowid int,
label float,
features ARRAY<STRING>
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY "," STORED AS TEXTFILE LOCATION '/dataset/a9a/test';