@@ -639,3 +639,331 @@ function Base.push!(ds::Dataset, row::Any; promote::Bool=false)
639
639
_reset_grouping_info! (ds)
640
640
ds
641
641
end
642
+
643
+
644
+ function Base. pushfirst! (ds:: Dataset , row:: Union{AbstractDict, NamedTuple} ;
645
+ cols:: Symbol = :setequal ,
646
+ promote:: Bool = (cols in [:union , :subset ]))
647
+ # push keep formats
648
+ possible_cols = (:orderequal , :setequal , :intersect , :subset , :union )
649
+ if ! (cols in possible_cols)
650
+ throw (ArgumentError (" `cols` keyword argument must be any of :" *
651
+ join (possible_cols, " , :" )))
652
+ end
653
+
654
+ nrows, ncols = size (ds)
655
+ targetrows = nrows + 1
656
+ # here the formats should be kept, setproperty! modifies time
657
+ if ncols == 0 && row isa NamedTuple
658
+ for (n, v) in pairs (row)
659
+ format_of_cur_col = getformat (ds, n)
660
+ setproperty! (ds, n, fill! (allocatecol (typeof (v), 1 ), v))
661
+ setformat! (ds, n => format_of_cur_col)
662
+ end
663
+ _reset_grouping_info! (ds)
664
+ return ds
665
+ end
666
+
667
+ old_row_type = typeof (row)
668
+ if row isa AbstractDict && keytype (row) != = Symbol &&
669
+ (keytype (row) <: AbstractString || all (x -> x isa AbstractString, keys (row)))
670
+ row = (;(Symbol .(keys (row)) .=> values (row)). .. )
671
+ end
672
+
673
+ # in the code below we use a direct access to _columns because
674
+ # we resize the columns so temporarily the `Dataset` is internally
675
+ # inconsistent and normal data set indexing would error.
676
+ if cols == :union
677
+ current_modified = _attributes (ds). meta. modified[]
678
+ if row isa AbstractDict && keytype (row) != = Symbol && ! all (x -> x isa Symbol, keys (row))
679
+ throw (ArgumentError (" when `cols == :union` all keys of row must be Symbol" ))
680
+ end
681
+ for (i, colname) in enumerate (_names (ds))
682
+ format_of_cur_col = getformat (ds, colname)
683
+ col = _columns (ds)[i]
684
+ if haskey (row, colname)
685
+ val = row[colname]
686
+ else
687
+ val = missing
688
+ end
689
+ S = typeof (val)
690
+ T = eltype (col)
691
+ if S <: T || promote_type (S, T) <: T
692
+ pushfirst! (col, val)
693
+ elseif ! promote
694
+ try
695
+ pushfirst! (col, val)
696
+ catch err
697
+ setformat! (ds, colname => format_of_cur_col)
698
+ for col in _columns (ds)
699
+ resize! (col, nrows)
700
+ end
701
+ _attributes (ds). meta. modified[] = current_modified
702
+ @error " Error adding value to column :$colname ."
703
+ rethrow (err)
704
+ end
705
+ else
706
+ newcol = similar (col, promote_type (S, T), targetrows)
707
+ copyto! (newcol, 2 , col, 1 , nrows)
708
+ newcol[1 ] = val
709
+ firstindex (newcol) != 1 && _onebased_check_error ()
710
+ _columns (ds)[i] = newcol
711
+ setformat! (ds, colname => format_of_cur_col)
712
+ end
713
+ end
714
+ for (colname, col) in zip (_names (ds), _columns (ds))
715
+ if length (col) != targetrows
716
+ for col2 in _columns (ds)
717
+ resize! (col2, nrows)
718
+ end
719
+ _attributes (ds). meta. modified[] = current_modified
720
+ throw (AssertionError (" Error adding value to column :$colname " ))
721
+ end
722
+ end
723
+ for colname in setdiff (keys (row), _names (ds))
724
+ val = row[colname]
725
+ S = typeof (val)
726
+ if nrows == 0
727
+ newcol = [val]
728
+ else
729
+ newcol = allocatecol (Union{Missing, S}, targetrows)
730
+ fill! (newcol, missing )
731
+ newcol[1 ] = val
732
+ end
733
+ ds[! , colname] = newcol
734
+ end
735
+ _modified (_attributes (ds))
736
+ _reset_grouping_info! (ds)
737
+ return ds
738
+ end
739
+
740
+ if cols == :orderequal
741
+ if old_row_type <: Dict
742
+ throw (ArgumentError (" passing `Dict` as `row` when `cols == :orderequal` " *
743
+ " is not allowed as it is unordered" ))
744
+ elseif length (row) != ncol (ds) || any (x -> x[1 ] != x[2 ], zip (keys (row), _names (ds)))
745
+ throw (ArgumentError (" when `cols == :orderequal` pushed row must " *
746
+ " have the same column names and in the " *
747
+ " same order as the target data set" ))
748
+ end
749
+ elseif cols === :setequal
750
+ # Only check for equal lengths if :setequal is selected,
751
+ # as an error will be thrown below if some names don't match
752
+ if length (row) != ncols
753
+ # an explicit error is thrown as this was allowed in the past
754
+ throw (ArgumentError (" `pushfirst!` with `cols` equal to `:setequal` " *
755
+ " requires `row` to have the same number of elements " *
756
+ " as the number of columns in `ds`." ))
757
+ end
758
+ end
759
+ current_col = 0
760
+ current_modified = _attributes (ds). meta. modified[]
761
+ try
762
+ for (col, nm) in zip (_columns (ds), _names (ds))
763
+ format_of_cur_col = getformat (ds, nm)
764
+ current_col += 1
765
+ if cols === :subset
766
+ val = get (row, nm, missing )
767
+ else
768
+ val = row[nm]
769
+ end
770
+ S = typeof (val)
771
+ T = eltype (col)
772
+ if S <: T || ! promote || promote_type (S, T) <: T
773
+ pushfirst! (col, val)
774
+ else
775
+ newcol = similar (col, promote_type (S, T), targetrows)
776
+ copyto! (newcol, 2 , col, 1 , nrows)
777
+ newcol[1 ] = val
778
+ firstindex (newcol) != 1 && _onebased_check_error ()
779
+ _columns (ds)[columnindex (ds, nm)] = newcol
780
+ setformat! (ds, nm => format_of_cur_col)
781
+ end
782
+ end
783
+ current_col = 0
784
+ for col in _columns (ds)
785
+ current_col += 1
786
+ @assert length (col) == targetrows
787
+ end
788
+ catch err
789
+ for col in _columns (ds)
790
+ resize! (col, nrows)
791
+ end
792
+ _attributes (ds). meta. modified[] = current_modified
793
+ @error " Error adding value to column :$(_names (ds)[current_col]) ."
794
+ rethrow (err)
795
+ end
796
+ _modified (_attributes (ds))
797
+ _reset_grouping_info! (ds)
798
+ return ds
799
+ end
800
+
801
+ """
802
+ pushfirst!(ds::Dataset, row::Union{Tuple, AbstractArray}; promote::Bool=false)
803
+ pushfirst!(ds::Dataset, row::Union{DatasetRow, NamedTuple, AbstractDict};
804
+ cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))
805
+
806
+ Add in-place one row at the beginning of `ds` taking the values from `row`.
807
+
808
+ Column types of `ds` are preserved, and new values are converted if necessary.
809
+ An error is thrown if conversion fails.
810
+
811
+ If `row` is neither a `DatasetRow`, `NamedTuple` nor `AbstractDict` then
812
+ it must be a `Tuple` or an `AbstractArray`
813
+ and columns are matched by order of appearance. In this case `row` must contain
814
+ the same number of elements as the number of columns in `ds`.
815
+
816
+ If `row` is a `DatasetRow`, `NamedTuple` or `AbstractDict` then
817
+ values in `row` are matched to columns in `ds` based on names. The exact behavior
818
+ depends on the `cols` argument value in the following way:
819
+ * If `cols == :setequal` (this is the default)
820
+ then `row` must contain exactly the same columns as `ds` (but possibly in a
821
+ different order).
822
+ * If `cols == :orderequal` then `row` must contain the same columns in the same
823
+ order (for `AbstractDict` this option requires that `keys(row)` matches
824
+ `propertynames(ds)` to allow for support of ordered dicts; however, if `row`
825
+ is a `Dict` an error is thrown as it is an unordered collection).
826
+ * If `cols == :intersect` then `row` may contain more columns than `ds`,
827
+ but all column names that are present in `ds` must be present in `row` and only
828
+ they are used to populate a new row in `ds`.
829
+ * If `cols == :subset` then `pushfirst!` behaves like for `:intersect` but if some
830
+ column is missing in `row` then a `missing` value is pushed to `ds`.
831
+ * If `cols == :union` then columns missing in `ds` that are present in `row` are
832
+ added to `ds` (using `missing` for existing rows) and a `missing` value is
833
+ pushed to columns missing in `row` that are present in `ds`.
834
+
835
+ If `promote=true` and element type of a column present in `ds` does not allow
836
+ the type of a pushed argument then a new column with a promoted element type
837
+ allowing it is freshly allocated and stored in `ds`. If `promote=false` an error
838
+ is thrown.
839
+
840
+ As a special case, if `ds` has no columns and `row` is a `NamedTuple` or
841
+ `DatasetRow`, columns are created for all values in `row`, using their names
842
+ and order.
843
+
844
+ Please note that `pushfirst!` must not be used on a `Dataset` that contains columns
845
+ that are aliases (equal when compared with `===`).
846
+
847
+ # Examples
848
+ ```jldoctest
849
+ julia> ds = Dataset(A=1:3, B=1:3);
850
+
851
+ julia> pushfirst!(ds, (true, false))
852
+ 4×2 Dataset
853
+ Row │ A B
854
+ │ identity identity
855
+ │ Int64? Int64?
856
+ ─────┼────────────────────
857
+ 1 │ 1 0
858
+ 2 │ 1 1
859
+ 3 │ 2 2
860
+ 4 │ 3 3
861
+
862
+ julia> pushfirst!(ds, ds[1, :])
863
+ 5×2 Dataset
864
+ Row │ A B
865
+ │ identity identity
866
+ │ Int64? Int64?
867
+ ─────┼────────────────────
868
+ 1 │ 1 0
869
+ 2 │ 1 0
870
+ 3 │ 1 1
871
+ 4 │ 2 2
872
+ 5 │ 3 3
873
+
874
+ julia> pushfirst!(ds, (C="something", A=true, B=false), cols=:intersect)
875
+ 6×2 Dataset
876
+ Row │ A B
877
+ │ identity identity
878
+ │ Int64? Int64?
879
+ ─────┼────────────────────
880
+ 1 │ 1 0
881
+ 2 │ 1 0
882
+ 3 │ 1 0
883
+ 4 │ 1 1
884
+ 5 │ 2 2
885
+ 6 │ 3 3
886
+
887
+ julia> pushfirst!(ds, Dict(:A=>1.0, :C=>1.0), cols=:union)
888
+ 7×3 Dataset
889
+ Row │ A B C
890
+ │ identity identity identity
891
+ │ Float64? Int64? Float64?
892
+ ─────┼───────────────────────────────
893
+ 1 │ 1.0 missing 1.0
894
+ 2 │ 1.0 0 missing
895
+ 3 │ 1.0 0 missing
896
+ 4 │ 1.0 0 missing
897
+ 5 │ 1.0 1 missing
898
+ 6 │ 2.0 2 missing
899
+ 7 │ 3.0 3 missing
900
+
901
+ julia> pushfirst!(ds, NamedTuple(), cols=:subset)
902
+ 8×3 Dataset
903
+ Row │ A B C
904
+ │ identity identity identity
905
+ │ Float64? Int64? Float64?
906
+ ─────┼────────────────────────────────
907
+ 1 │ missing missing missing
908
+ 2 │ 1.0 missing 1.0
909
+ 3 │ 1.0 0 missing
910
+ 4 │ 1.0 0 missing
911
+ 5 │ 1.0 0 missing
912
+ 6 │ 1.0 1 missing
913
+ 7 │ 2.0 2 missing
914
+ 8 │ 3.0 3 missing
915
+ ```
916
+ """
917
+ function Base. pushfirst! (ds:: Dataset , row:: Any ; promote:: Bool = false )
918
+
919
+ # Modify Dataset
920
+ if ! (row isa Union{Tuple, AbstractArray})
921
+ # an explicit error is thrown as this was allowed in the past
922
+ throw (ArgumentError (" `pushfirst!` does not allow passing collections of type " *
923
+ " $(typeof (row)) to be pushed into a Dataset. Only " *
924
+ " `Tuple`, `AbstractArray`, `AbstractDict`, `DatasetRow` " *
925
+ " and `NamedTuple` are allowed." ))
926
+ end
927
+ nrows, ncols = size (ds)
928
+ targetrows = nrows + 1
929
+ if length (row) != ncols
930
+ msg = " Length of `row` does not match `Dataset` column count."
931
+ throw (DimensionMismatch (msg))
932
+ end
933
+ current_col = 0
934
+ current_modified = _attributes (ds). meta. modified[]
935
+ try
936
+ for (i, (col, val)) in enumerate (zip (_columns (ds), row))
937
+ current_col += 1
938
+ format_of_cur_col = getformat (ds, current_col)
939
+ S = typeof (val)
940
+ T = eltype (col)
941
+ if S <: T || ! promote || promote_type (S, T) <: T
942
+ pushfirst! (col, val)
943
+ else
944
+ newcol = allocatecol (promote_type (S, T), targetrows)
945
+ copyto! (newcol, 2 , col, 1 , nrows)
946
+ newcol[1 ] = val
947
+ firstindex (newcol) != 1 && _onebased_check_error ()
948
+ _columns (ds)[i] = newcol
949
+ setformat! (ds, i => format_of_cur_col)
950
+ end
951
+ end
952
+ current_col = 0
953
+ for col in _columns (ds)
954
+ current_col += 1
955
+ @assert length (col) == targetrows
956
+ end
957
+ catch err
958
+ # clean up partial row
959
+ for col in _columns (ds)
960
+ resize! (col, nrows)
961
+ end
962
+ _attributes (ds). meta. modified[] = current_modified
963
+ @error " Error adding value to column :$(_names (ds)[current_col]) ."
964
+ rethrow (err)
965
+ end
966
+ _modified (_attributes (ds))
967
+ _reset_grouping_info! (ds)
968
+ ds
969
+ end
0 commit comments