diff --git a/content/post/why-use-spark/featured.png b/content/post/why-use-spark/featured.png new file mode 100644 index 0000000..6b00da4 Binary files /dev/null and b/content/post/why-use-spark/featured.png differ diff --git a/content/post/why-use-spark/gallery/therinspark.jpg b/content/post/why-use-spark/gallery/therinspark.jpg new file mode 100644 index 0000000..dd825d1 Binary files /dev/null and b/content/post/why-use-spark/gallery/therinspark.jpg differ diff --git a/content/post/why-use-spark/index.md b/content/post/why-use-spark/index.md new file mode 100644 index 0000000..4720fdc --- /dev/null +++ b/content/post/why-use-spark/index.md @@ -0,0 +1,36 @@ ++++ +title = "Why use Spark?" + +date = 2020-06-08T21:00:00 +draft = false + +authors = ["Gabriel Teotonio"] + +tags = ["spark", "hadoop"] + +summary = "" + +# Projects (optional). +# Associate this post with one or more of your projects. +# Otherwise, set `projects = []`. + + +# Featured image +# To use, add an image named `featured.jpg/png` to your project's folder. +[image] +# Caption (optional) +caption = "" + +# Focal point (optional) +# Options: Smart, Center, TopLeft, Top, TopRight, Left, Right, BottomLeft, Bottom, BottomRight +focal_point = "" + +# Show image only in page previews? +preview_only = false + ++++ + +When I started my professional career as a Data Scientist two years ago, the first challenge introduced was be able to deal with large data sets in a cloud platform. Among many tooling frameworks to get started by the time, [Spark](https://spark.apache.org/) played a big role and there are many reasons for that. Spark is considered as a *unified analytics engine for large-scale data processing* and its generality combining SQL, streaming, and complex analytics brings up this popularity. +In the end of last year, the book [_Mastering Spark with R_](https://therinspark.com/) was realesed and as a R user I got engaged to understand more about sparklyr package environment and the union of Spark and R. With this in mind I will write a series of posts summarizing the learning and topics covered in the book. +![Mastering Spark with R](gallery/therinspark.jpg =150x100) +To understand more about how Spark became reference in the big data scenario it's good to see the history behind. \ No newline at end of file