-
Notifications
You must be signed in to change notification settings - Fork 1
/
keyIndexerUpdate.java
129 lines (105 loc) · 3.94 KB
/
keyIndexerUpdate.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class keyIndexer {
public static String keyline;
public static HashMap<String,Integer> hmap = new HashMap<String, Integer>();
public static String[] keylist= new String[100];
public static class keyIndexMapper extends MapReduceBase
implements Mapper<LongWritable, Text, Text, Text> {
private final static Text word = new Text();
private final static Text location = new Text();
public void map(LongWritable key, Text val,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
//read the Inverted index file as input
String line = val.toString();
String[] itr = line.split("\\s+");
//store the first words i.e the term in the inverted index
word.set(itr[0]);
//keylist holds the list of keywords
for(String ele : keylist){
//TODO check for substrings
if(ele.equals(word.toString())){ //if the word belongs to the keylist
for(int i=1;i<itr.length;i++) {
location.set(itr[i]);
output.collect(location,word);
//output in the format of book-> word1, word2 ...
}
}
}
}
}
public static class keyIndexReducer extends MapReduceBase
implements Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
int count=0;
StringBuilder toReturn = new StringBuilder();
while (values.hasNext()){
count++;
toReturn.append(values.next().toString());
}
output.collect(key, new Text(toReturn.toString()));
System.out.println(key);
hmap.put(key.toString(), count);
System.out.println(hmap);
}
}
/**
* The actual main() method for our program; this is the
* "driver" for the MapReduce job.
*/
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(keyIndexer.class);
conf.setJobName("keyIndexer");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
//to get the keywords
FileSystem fs;
try {
// to read the keywords from the file 'keyIP' stored in folder 'keys'
fs = FileSystem.get(conf);
Path keyfile= new Path(new Path("keys"),"keyIP");
if(!fs.exists(keyfile))
throw new IOException("Keywords file not found");
BufferedReader br = null;
br = new BufferedReader (new InputStreamReader(fs.open(keyfile)));
while((keyline= br.readLine())!= null){
keylist = keyline.split("\\s+");
System.out.println("hello there do something !!!"+keylist[0]+keylist[1]);
}
// the input here is the output of the inverted indexer stored in 'output' , filename 'part-00000'
FileInputFormat.addInputPath(conf, new Path("output/part-00000"));
FileOutputFormat.setOutputPath(conf, new Path("keyOP"));
conf.setMapperClass(keyIndexMapper.class);
conf.setReducerClass(keyIndexReducer.class);
client.setConf(conf);
try {
JobClient.runJob(conf);
} catch (Exception e) {
e.printStackTrace();
}
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
System.out.println(hmap);
}
}