JuliaData · bkamins · Feb 11, 2023 · Feb 5, 2023 · Feb 5, 2023 · Feb 6, 2023
diff --git a/Project.toml b/Project.toml
@@ -23,6 +23,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
+CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 
 [compat]
 CategoricalArrays = "0.10.0"
@@ -42,6 +43,7 @@ TableTraits = "0.4, 1"
 Tables = "1.9.0"
 Unitful = "1"
 julia = "1.6"
+CSV = "0.10.9"
 
 [extras]
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

diff --git a/docs/src/man/basics.md b/docs/src/man/basics.md
@@ -16,7 +16,7 @@ or
 ```julia
 julia> ] # ']' should be pressed
 
-(@v1.6) pkg> add DataFrames
+(@v1.9) pkg> add DataFrames
 ```
 
 If you want to make sure everything works as expected you can run the tests
@@ -35,9 +35,9 @@ you have installed with the `status` command.
 ```julia
 julia> ]
 
-(@v1.6) pkg> status DataFrames
-      Status `C:\Users\TeAmp0is0N\.julia\environments\v1.6\Project.toml`
-  [a93c6f00] DataFrames v1.1.1
+(@v1.9) pkg> status DataFrames
+      Status `~\v1.6\Project.toml`
+  [a93c6f00] DataFrames v1.5.0
 ```
 
 Throughout the rest of the tutorial we will assume that you have installed the
@@ -52,6 +52,37 @@ The most fundamental type provided by DataFrames.jl is `DataFrame`, where
 typically each row is interpreted as an observation and each column as a
 feature.
 
+!!! note
+
+    DataFrames.jl uses precompilation to improve its responsiveness. However,
+    in some scenarios users might want to avoid precompilaion to improve
+    package installation time and load time. To disable precompilation of
+    DataFrames.jl in your current project you need to install the
+    [SnoopPrecompile.jl](https://github.com/timholy/SnoopCompile.jl/tree/master/SnoopPrecompile)
+    package and then run the following code:
+    ```
+    using SnoopPrecompile
+    SnoopPrecompile.Preferences.set_preferences!(SnoopPrecompile,
+        "skip_precompile" =>
+        union(SnoopPrecompile.Preferences.load_preference(SnoopPrecompile,
+                                                          "skip_precompile",
+                                                          String[]),
+              ["DataFrames"]);
+        force=true)
+    ```
+    If you later would want to re-enable precompilation of DataFrames.jl you
+    can do it using the following commands:
+    ```
+    using SnoopPrecompile
+    SnoopPrecompile.Preferences.set_preferences!(SnoopPrecompile,
+        "skip_precompile" =>
+        filter(!=("DataFrames"),
+               SnoopPrecompile.Preferences.load_preference(SnoopPrecompile,
+                                                           "skip_precompile",
+                                                           String[]));
+        force=true)
+    ```
+
 ## Constructors and Basic Utility Functions
 
 ### Constructors
@@ -1785,7 +1816,7 @@ in them:
 julia> select(german, Not(["Age", "Saving accounts", "Checking account",
                            "Credit amount", "Purpose"]))
 1000×5 DataFrame
-  Row │ id     Sex      Job    Housing  Duration 
+  Row │ id     Sex      Job    Housing  Duration
       │ Int64  String7  Int64  String7  Int64
 ──────┼──────────────────────────────────────────
     1 │     0  male         2  own             6

diff --git a/src/other/precompile.jl b/src/other/precompile.jl
@@ -1,6 +1,14 @@
 import SnoopPrecompile
 
 SnoopPrecompile.@precompile_all_calls begin
+    import CSV
+
+    # definition needed to avoid dispatch ambiguity
+    Base.reduce(::typeof(vcat),
+                dfs::CSV.SentinelArrays.ChainedVector{T, A} where {T<:AbstractDataFrame,
+                                                                A<:AbstractVector{T}}) =
+        reduce(vcat, collect(AbstractDataFrame, dfs))
+
     df = DataFrame(a=[2, 5, 3, 1, 0], b=["a", "b", "c", "a", "b"], c=1:5,
                    p=PooledArray(["a", "b", "c", "a", "b"]),
                    q=[true, false, true, false, true],
@@ -22,16 +30,53 @@ SnoopPrecompile.@precompile_all_calls begin
     outerjoin(df, df, on=:a, makeunique=true)
     outerjoin(df, df, on=:b, makeunique=true)
     outerjoin(df, df, on=:c, makeunique=true)
-    semijoin(df, df, on=:a)
-    semijoin(df, df, on=:b)
-    semijoin(df, df, on=:c)
     leftjoin!(df, DataFrame(a=[2, 5, 3, 1, 0]), on=:a)
     leftjoin!(df, DataFrame(b=["a", "b", "c", "d", "e"]), on=:b)
     leftjoin!(df, DataFrame(c=1:5), on=:c)
     reduce(vcat, [df, df])
     show(IOBuffer(), df)
     subset(df, :q)
-    @view df[1:3, :]
+    subset!(copy(df), :q)
+    df[:, 1:2]
+    df[1:2, :]
+    df[1:2, 1:2]
     @view df[:, 1:2]
+    @view df[1:2, :]
+    @view df[1:2, 1:2]
     transform!(df, :c, [:c :f] .=> [sum, mean, std], :c => :d, [:a, :c] => cor)
+    deleteat!(df, 1)
+    append!(df, copy(df))
+    push!(df, copy(df[1, :]))
+    eachrow(df)
+    eachcol(df)
+    empty(df)
+    empty!(copy(df))
+    filter(:q => identity, df)
+    filter!(:q => identity, df)
+    first(df)
+    last(df)
+    hcat(df, df, makeunique=true)
+    issorted(df)
+    pop!(df)
+    popfirst!(df)
+    repeat(df, 2)
+    reverse(df)
+    reverse!(df)
+    unique(df, :a)
+    unique!(df, :a)
+    wide = DataFrame(id=1:6,
+                     a=repeat(1:3, inner=2),
+                     b=repeat(1.0:2.0, inner=3),
+                     c=repeat(1.0:1.0, inner=6),
+                     d=repeat(1.0:3.0, inner=2))
+    long = stack(wide)
+    unstack(long)
+    unstack(long, :variable, :value, combine=sum)
+    flatten(DataFrame(a=[[1, 2], [3, 4]], b=[1, 2]), :a)
+    dropmissing(DataFrame(a=[1, 2, 3, missing], b=["a", missing, "c", "d"]))
+    df = DataFrame(rand(20, 2), :auto)
+    df.id = repeat(1:2, 10)
+    combine(df, AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
+    combine(groupby(df, :id), AsTable(r"x") .=> [ByRow(sum), ByRow(mean)])
+    CSV.read(IOBuffer("a,b,c\n1,1.0,a"), DataFrame)
 end
diff --git a/test/dataframe.jl b/test/dataframe.jl
@@ -1892,6 +1892,11 @@ end
                            DataFrame(c=[missing, missing]))
 end
 
+@testset "vcat ChainedVector ambiguity" begin
+    dfs = DataFrames.CSV.SentinelArrays.ChainedVector([[DataFrame(a=1)], [DataFrame(a=2)]])
+    @test reduce(vcat, dfs) == DataFrame(a=1:2)
+end
+
 @testset "names for Type, predicate + standard tests of cols" begin
     df_long = DataFrame(a1=1:3, a2=[1, missing, 3],
                         b1=1.0:3.0, b2=[1.0, missing, 3.0],