Skip to content

Commit

Permalink
Back to normal
Browse files Browse the repository at this point in the history
  • Loading branch information
jgperrin committed Sep 24, 2021
1 parent 19e67b9 commit 99b5935
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 0 deletions.
23 changes: 23 additions & 0 deletions data/books.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
id,authorId,title,releaseDate,link
1,1,Fantastic Beasts and Where to Find Them: The Original Screenplay,11/18/16,http://amzn.to/2kup94P
2,1,"Harry Potter and the Sorcerer's Stone: The Illustrated Edition (Harry Potter, Book 1)",10/6/15,http://amzn.to/2l2lSwP
3,1,"The Tales of Beedle the Bard, Standard Edition (Harry Potter)",12/4/08,http://amzn.to/2kYezqr
4,1,"Harry Potter and the Chamber of Secrets: The Illustrated Edition (Harry Potter, Book 2)",10/4/16,http://amzn.to/2kYhL5n
5,2,"Informix 12.10 on Mac 10.12 with a dash of Java 8: The Tale of the Apple, the Coffee, and a Great Database",4/23/17,http://amzn.to/2i3mthT
6,2,"Development Tools in 2006: any Room for a 4GL-style Language?: An independent study by Jean Georges Perrin, IIUG Board Member",12/28/16,http://amzn.to/2vBxOe1
7,3,Adventures of Huckleberry Finn,5/26/94,http://amzn.to/2wOeOav
8,3,A Connecticut Yankee in King Arthur's Court,6/17/17,http://amzn.to/2x1NuoD
10,4,Jacques le Fataliste,3/1/00,http://amzn.to/2uZj2KA
11,4,Diderot Encyclopedia: The Complete Illustrations 1762-1777,,http://amzn.to/2i2zo3I
12,,A Woman in Berlin,7/11/06,http://amzn.to/2i472WZ
13,6,Spring Boot in Action,1/3/16,http://amzn.to/2hCPktW
14,6,Spring in Action: Covers Spring 4,11/28/14,http://amzn.to/2yJLyCk
15,7,Soft Skills: The software developer's life manual,12/29/14,http://amzn.to/2zNnSyn
16,8,Of Mice and Men,,http://amzn.to/2zJjXoc
17,9,"Java 8 in Action: Lambdas, Streams, and functional-style programming",8/28/14,http://amzn.to/2isdqoL
18,12,Hamlet,6/8/12,http://amzn.to/2yRbewY
19,13,Pensées,12/31/1670,http://amzn.to/2jweHOG
20,14,"Fables choisies, mises en vers par M. de La Fontaine",9/1/1999,http://amzn.to/2yRH10W
21,15,Discourse on Method and Meditations on First Philosophy,6/15/1999,http://amzn.to/2hwB8zc
22,12,Twelfth Night,7/1/4,http://amzn.to/2zPYnwo
23,12,Macbeth,7/1/3,http://amzn.to/2zPYnwo
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package net.jgp.books.spark.ch01.lab100_csv_to_dataframe;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/**
* CSV ingestion in a dataframe.
*
* @author jgp
*/
public class CsvToDataframeApp {

/**
* main() is your entry point to the application.
*
* @param args
*/
public static void main(String[] args) {
CsvToDataframeApp app = new CsvToDataframeApp();
app.start();
}

/**
* The processing code.
*/
private void start() {
// Creates a session on a local master
SparkSession spark = SparkSession.builder()
.appName("CSV to Dataset")
.master("local")
.getOrCreate();

// Reads a CSV file with header, called books.csv, stores it in a
// dataframe
Dataset<Row> df = spark.read().format("csv")
.option("header", "true")
.load("data/books.csv");

// Shows at most 5 rows from the dataframe
df.show(5);
}
}
22 changes: 22 additions & 0 deletions src/main/python/lab100_csv_to_dataframe/csvToDataframeApp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
CsvToDataframeApp.py - CSV ingestion in a dataframe.
@author rambabu.posa
"""
from pyspark.sql import SparkSession
import os

current_dir = os.path.dirname(__file__)
relative_path = "../../../../data/books.csv"
absolute_file_path = os.path.join(current_dir, relative_path)

# Creates a session on a local master
session = SparkSession.builder.appName("CSV to Dataset").master("local[*]").getOrCreate()

# Reads a CSV file with header, called books.csv, stores it in a dataframe
df = session.read.csv(header=True, inferSchema=True, path=absolute_file_path)

# Shows at most 5 rows from the dataframe
df.show(5)

# Good to stop SparkSession at the end of the application
session.stop()
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package net.jgp.books.spark.ch01.lab100_csv_to_dataframe

import org.apache.spark.sql.SparkSession

/**
* CSV ingestion in a dataframe.
*
* @author rambabu.posa
*/
object CsvToDataframeScalaApp {

/**
* main() is your entry point to the application.
*
* @param args
*/
def main(args: Array[String]): Unit = {

// Creates a session on a local master
val spark = SparkSession.builder
.appName("CSV to Dataset")
.master("local[*]")
.getOrCreate

// Reads a CSV file with header, called books.csv, stores it in a dataframe
val df = spark.read.format("csv")
.option("header", "true")
.load("data/books.csv")

// Shows at most 5 rows from the dataframe
df.show(5)

// Good to stop SparkSession at the end of the application
spark.stop
}

}

0 comments on commit 99b5935

Please sign in to comment.