forked from jgperrin/net.jgp.books.spark.ch01
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
125 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
id,authorId,title,releaseDate,link | ||
1,1,Fantastic Beasts and Where to Find Them: The Original Screenplay,11/18/16,http://amzn.to/2kup94P | ||
2,1,"Harry Potter and the Sorcerer's Stone: The Illustrated Edition (Harry Potter, Book 1)",10/6/15,http://amzn.to/2l2lSwP | ||
3,1,"The Tales of Beedle the Bard, Standard Edition (Harry Potter)",12/4/08,http://amzn.to/2kYezqr | ||
4,1,"Harry Potter and the Chamber of Secrets: The Illustrated Edition (Harry Potter, Book 2)",10/4/16,http://amzn.to/2kYhL5n | ||
5,2,"Informix 12.10 on Mac 10.12 with a dash of Java 8: The Tale of the Apple, the Coffee, and a Great Database",4/23/17,http://amzn.to/2i3mthT | ||
6,2,"Development Tools in 2006: any Room for a 4GL-style Language?: An independent study by Jean Georges Perrin, IIUG Board Member",12/28/16,http://amzn.to/2vBxOe1 | ||
7,3,Adventures of Huckleberry Finn,5/26/94,http://amzn.to/2wOeOav | ||
8,3,A Connecticut Yankee in King Arthur's Court,6/17/17,http://amzn.to/2x1NuoD | ||
10,4,Jacques le Fataliste,3/1/00,http://amzn.to/2uZj2KA | ||
11,4,Diderot Encyclopedia: The Complete Illustrations 1762-1777,,http://amzn.to/2i2zo3I | ||
12,,A Woman in Berlin,7/11/06,http://amzn.to/2i472WZ | ||
13,6,Spring Boot in Action,1/3/16,http://amzn.to/2hCPktW | ||
14,6,Spring in Action: Covers Spring 4,11/28/14,http://amzn.to/2yJLyCk | ||
15,7,Soft Skills: The software developer's life manual,12/29/14,http://amzn.to/2zNnSyn | ||
16,8,Of Mice and Men,,http://amzn.to/2zJjXoc | ||
17,9,"Java 8 in Action: Lambdas, Streams, and functional-style programming",8/28/14,http://amzn.to/2isdqoL | ||
18,12,Hamlet,6/8/12,http://amzn.to/2yRbewY | ||
19,13,Pensées,12/31/1670,http://amzn.to/2jweHOG | ||
20,14,"Fables choisies, mises en vers par M. de La Fontaine",9/1/1999,http://amzn.to/2yRH10W | ||
21,15,Discourse on Method and Meditations on First Philosophy,6/15/1999,http://amzn.to/2hwB8zc | ||
22,12,Twelfth Night,7/1/4,http://amzn.to/2zPYnwo | ||
23,12,Macbeth,7/1/3,http://amzn.to/2zPYnwo |
43 changes: 43 additions & 0 deletions
43
src/main/java/net/jgp/books/spark/ch01/lab100_csv_to_dataframe/CsvToDataframeApp.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package net.jgp.books.spark.ch01.lab100_csv_to_dataframe; | ||
|
||
import org.apache.spark.sql.Dataset; | ||
import org.apache.spark.sql.Row; | ||
import org.apache.spark.sql.SparkSession; | ||
|
||
/** | ||
* CSV ingestion in a dataframe. | ||
* | ||
* @author jgp | ||
*/ | ||
public class CsvToDataframeApp { | ||
|
||
/** | ||
* main() is your entry point to the application. | ||
* | ||
* @param args | ||
*/ | ||
public static void main(String[] args) { | ||
CsvToDataframeApp app = new CsvToDataframeApp(); | ||
app.start(); | ||
} | ||
|
||
/** | ||
* The processing code. | ||
*/ | ||
private void start() { | ||
// Creates a session on a local master | ||
SparkSession spark = SparkSession.builder() | ||
.appName("CSV to Dataset") | ||
.master("local") | ||
.getOrCreate(); | ||
|
||
// Reads a CSV file with header, called books.csv, stores it in a | ||
// dataframe | ||
Dataset<Row> df = spark.read().format("csv") | ||
.option("header", "true") | ||
.load("data/books.csv"); | ||
|
||
// Shows at most 5 rows from the dataframe | ||
df.show(5); | ||
} | ||
} |
22 changes: 22 additions & 0 deletions
22
src/main/python/lab100_csv_to_dataframe/csvToDataframeApp.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
""" | ||
CsvToDataframeApp.py - CSV ingestion in a dataframe. | ||
@author rambabu.posa | ||
""" | ||
from pyspark.sql import SparkSession | ||
import os | ||
|
||
current_dir = os.path.dirname(__file__) | ||
relative_path = "../../../../data/books.csv" | ||
absolute_file_path = os.path.join(current_dir, relative_path) | ||
|
||
# Creates a session on a local master | ||
session = SparkSession.builder.appName("CSV to Dataset").master("local[*]").getOrCreate() | ||
|
||
# Reads a CSV file with header, called books.csv, stores it in a dataframe | ||
df = session.read.csv(header=True, inferSchema=True, path=absolute_file_path) | ||
|
||
# Shows at most 5 rows from the dataframe | ||
df.show(5) | ||
|
||
# Good to stop SparkSession at the end of the application | ||
session.stop() |
37 changes: 37 additions & 0 deletions
37
src/main/scala/net/jgp/books/spark/ch01/lab100_csv_to_dataframe/CsvToDataframeScalaApp.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package net.jgp.books.spark.ch01.lab100_csv_to_dataframe | ||
|
||
import org.apache.spark.sql.SparkSession | ||
|
||
/** | ||
* CSV ingestion in a dataframe. | ||
* | ||
* @author rambabu.posa | ||
*/ | ||
object CsvToDataframeScalaApp { | ||
|
||
/** | ||
* main() is your entry point to the application. | ||
* | ||
* @param args | ||
*/ | ||
def main(args: Array[String]): Unit = { | ||
|
||
// Creates a session on a local master | ||
val spark = SparkSession.builder | ||
.appName("CSV to Dataset") | ||
.master("local[*]") | ||
.getOrCreate | ||
|
||
// Reads a CSV file with header, called books.csv, stores it in a dataframe | ||
val df = spark.read.format("csv") | ||
.option("header", "true") | ||
.load("data/books.csv") | ||
|
||
// Shows at most 5 rows from the dataframe | ||
df.show(5) | ||
|
||
// Good to stop SparkSession at the end of the application | ||
spark.stop | ||
} | ||
|
||
} |