forked from awantik/pyspark-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhello.py
32 lines (24 loc) · 799 Bytes
/
hello.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""Calculates the word count of the given file.
the file can be local or if you setup cluster.
It can be hdfs file path"""
## Imports
from pyspark import SparkConf, SparkContext
from operator import add
import sys
## Constants
APP_NAME = " HelloWorld of Big Data"
##OTHER FUNCTIONS/CLASSES
def main(sc,filename):
textRDD = sc.textFile(filename)
words = textRDD.flatMap(lambda x: x.split(' ')).map(lambda x: (x, 1))
wordcount = words.reduceByKey(add).collect()
for wc in wordcount:
print (wc[0],wc[1])
if __name__ == "__main__":
# Configure Spark
conf = SparkConf().setAppName(APP_NAME)
conf = conf.setMaster("local[*]")
sc = SparkContext(conf=conf)
filename = sys.argv[1]
# Execute Main functionality
main(sc, filename)