Update tutorial and update to new IndexedTables (#31)

* Some tutorial fixes * update to latest IndexedTables * final tutorial fixes * make compatible with older IndexedTables * bump version for IndexedTables and JuliaDB * updated documenting strategy * added JuliaDBMeta * fix deploydocs * deleted manifes * added compatibility to documentor
JuliaData · Dec 23, 2018 · ee1b60f · ee1b60f
1 parent 3e761d4
commit ee1b60f
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 41 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@
 *play.jl
 docs/build/
 docs/site/
+docs/Manifest.toml
diff --git a/.travis.yml b/.travis.yml
@@ -12,28 +12,19 @@ matrix:
 notifications:
   email: false
 
-## uncomment the following lines to allow failures on nightly julia
-## (tests will run but not make your overall status red)
-#matrix:
-#  allow_failures:
-#  - julia: nightly
+jobs:
+  include:
+    - stage: "Documentation"
+      julia: 1.0
+      os: linux
+      script:
+        - julia --project=docs/ -e 'using Pkg; Pkg.instantiate();
+                                    Pkg.develop(PackageSpec(path=pwd()))'
+        - julia --project=docs/ docs/make.jl
+      after_success: skip
 
-## uncomment and modify the following lines to manually install system packages
-#addons:
-#  apt: # apt-get for linux
-#    packages:
-#    - gfortran
-#before_script: # homebrew for mac
-#  - if [ $TRAVIS_OS_NAME = osx ]; then brew install gcc; fi
-
-## uncomment the following lines to override the default test script
-#script:
-#  - julia -e 'Pkg.clone(pwd()); Pkg.build("JuliaDBMeta"); Pkg.test("JuliaDBMeta"; coverage=true)'
 after_success:
   # push coverage results to Coveralls
   - julia -e 'import Pkg; cd(Pkg.dir("JuliaDBMeta")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'
   # push coverage results to Codecov
   - julia -e 'import Pkg; cd(Pkg.dir("JuliaDBMeta")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
-  # deploy docs
-  - julia -e 'import Pkg; Pkg.add("Documenter")'
-  - julia -e 'import Pkg; cd(Pkg.dir("JuliaDBMeta")); include(joinpath("docs", "make.jl"))'
diff --git a/REQUIRE b/REQUIRE
@@ -1,6 +1,6 @@
 julia 1.0
-IndexedTables 0.8.1
-JuliaDB 0.9.0
+IndexedTables 0.9.0
+JuliaDB 0.10.0
 Reexport
 MacroTools
 IterTools

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -0,0 +1,6 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+JuliaDBMeta = "2c06ca41-a429-545c-b8f0-5ca7dd64ba19"
+
+[compat]
+Documenter = "~0.21"
diff --git a/docs/make.jl b/docs/make.jl
@@ -21,8 +21,4 @@ makedocs(
 deploydocs(
     repo = "github.com/piever/JuliaDBMeta.jl.git",
     target = "build",
-    julia  = "0.6",
-    osname = "linux",
-    deps   = nothing,
-    make   = nothing
 )
diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md
@@ -50,7 +50,7 @@ Let's assume we want to select `UniqueCarrier` and `DepDelay` columns and filter
 
 
 ```julia
-@where select(flights, (:UniqueCarrier, :DepDelay)) :DepDelay > 60
+@where select(flights, (:UniqueCarrier, :DepDelay)) !ismissing(:DepDelay) && :DepDelay > 60
 ```
 
 
@@ -90,7 +90,7 @@ Piping:
 
 
 ```julia
-select(flights, (:UniqueCarrier, :DepDelay)) |> @where :DepDelay > 60
+select(flights, (:UniqueCarrier, :DepDelay)) |> @where !ismissing(:DepDelay) && :DepDelay > 60
 ```
 
 
@@ -102,7 +102,7 @@ To avoid the parenthesis and to use the `_` curryfication syntax, you can use th
 ```julia
 @apply flights begin
     select(_, (:UniqueCarrier, :DepDelay))
-    @where :DepDelay > 60
+    @where !ismissing(:DepDelay) && :DepDelay > 60
 end
 ```
 
@@ -162,7 +162,8 @@ To get the average delay, we first filter away datapoints where `ArrDelay` is mi
 
 
 ```julia
-@groupby flights :Dest {mean(dropna(:ArrDelay))}
+using Statistics
+@groupby flights :Dest {mean(skipmissing(:ArrDelay))}
 ```
 
 
@@ -212,7 +213,7 @@ sortedflights = reindex(flights, :Dest)
 using BenchmarkTools
 
 println("Presorted timing:")
-@benchmark @groupby sortedflights {mean(dropna(:ArrDelay))}
+@benchmark @groupby sortedflights {mean(skipmissing(:ArrDelay))}
 ```
 
     Presorted timing:
@@ -234,7 +235,7 @@ println("Presorted timing:")
 
 ```julia
 println("Non presorted timing:")
-@benchmark @groupby flights :Dest {mean(dropna(:ArrDelay))}
+@benchmark @groupby flights :Dest {mean(skipmissing(:ArrDelay))}
 ```
 
     Non presorted timing:
@@ -348,7 +349,7 @@ delay a given flight had and figure out the day and month with the two greatest
 using StatsBase
 @apply flights :UniqueCarrier flatten = true begin
     # Exclude flights with missing DepDelay
-    @where !isnull(:DepDelay)
+    @where !ismissing(:DepDelay)
     # Select only those whose rank is less than 2
     @where_vec ordinalrank(:DepDelay, rev = true) .<= 2
     # Select appropriate fields
@@ -396,11 +397,11 @@ Though in this case, it would have been simpler to use Julia partial sorting:
 ```julia
 @apply flights :UniqueCarrier flatten = true begin
     # Exclude flights with missing DepDelay
-    @where !isnull(:DepDelay)
+    @where !ismissing(:DepDelay)
     # Select appropriate fields
     @map {:Month, :DayofMonth, :DepDelay}
     # select
-    @where_vec selectperm(:DepDelay, 1:2, rev = true)
+    @where_vec partialsortperm(:DepDelay, 1:2, rev = true)
 end;
 ```
 
@@ -434,9 +435,7 @@ end
 
 
 
-### Warning
-
-`missing` (the official Julia way of representing missing data) has not yet been adopted by JuliaDB, so using ShiftedArrays in combination with JuliaDB may be slightly troublesome in Julia 0.6. The situation should be solved in Julia 0.7, where the adoption of `missing` should become more widespread. You can use a different default value with ShiftedArrays (for example, with an `Array` of `Float64` you could do:
+You can also use a different default value with ShiftedArrays (for example, with an `Array` of `Float64` you could do:
 
 
 ```julia
@@ -455,8 +454,8 @@ Use the `@df` macro to be able to refer to columns simply by their name. You can
 using StatPlots
 @apply flights begin
     @transform {Far = :Distance > 1000}
-    @groupby (:Month, :Far) {MeanDep = mean(dropna(:DepDelay)), MeanArr = mean(dropna(:ArrDelay))}
-    @df scatter(:MeanDep, :MeanArr, group = {:Far}, layout = 2, color = :MeanDep ./maximum(:MeanDep), legend = :topleft)
+    @groupby (:Month, :Far) {MeanDep = mean(skipmissing(:DepDelay)), MeanArr = mean(skipmissing(:ArrDelay))}
+    @df scatter(:MeanDep, :MeanArr, group = {:Far}, layout = 2, zcolor = :MeanDep ./maximum(:MeanDep), legend = :topleft)
 end
 ```
 
@@ -470,6 +469,7 @@ For large datasets, summary statistics can be computed using efficient online al
 
 
 ```julia
+using OnlineStats
 @apply flights begin
     @where 500 < :Distance < 2000
     partitionplot(_, :Distance, stat = Extrema(), by = :Month, layout = 12, legend = false, xticks = [])

diff --git a/src/utils.jl b/src/utils.jl
@@ -90,8 +90,8 @@ replace_keyword(arg) = (@capture arg x_ = y_) ? Expr(:kw, x, y) : arg
 
 replace_keywords(args) = map(replace_keyword, args)
 
-_table(cols::C) where{C<:Columns} =
-       NextTable{C}(cols, Int[], IndexedTables.Perm[], fill(missing, length(cols)), nothing)
+_table(cols::C) where{C<:Columns} = 
+        IndexedTable{C}(cols, Int[], IndexedTables.Perm[], fill(missing, length(cols)), nothing)
 _table(c) = c
 
 distinct_tuple(args...) = Tuple(IterTools.distinct(args))