Skip to content

Commit d03e771

Browse files
committed
fix issues with byrow(join)
1 parent 69ce153 commit d03e771

File tree

3 files changed

+19
-15
lines changed

3 files changed

+19
-15
lines changed

docs/src/man/byrow.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Generally, `byrow` is efficient for any `fun` which returns a single value for e
5757
* `isequal` : Return `true` when all values are equal. Optionally, a vector of values can be passed via the `with` keyword to compare equality with it.
5858
* `isless` : Return `true` when all values are less than passed vector(or column specified by its name) as `with`. Passing `rev = true` change `less` to `greater`.
5959
* `issorted` : Check if the values are sorted
60-
* `join`: Convert values in each row to string and join them into a single string, inserting the given delimiter (if any) between adjacent values. If `last` is given, it will be used after the last string.
60+
* `join`: Convert values in each row to string and join them into a single string, inserting the given delimiter (if any) between adjacent values. If `last` is given, it will be used instead of `delim` between the last two strings.
6161
* `maximum` : Return the maximum value
6262
* `mean` : Compute the mean value
6363
* `minimum` : Return the minimum value

src/byrow/row_functions.jl

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,11 +1115,11 @@ row_hash(ds::AbstractDataset, cols = :; threads = true) = row_hash(ds, identity,
11151115
function _convert_uint8_to_string!(res, init0, curr_pos, ds, threads)
11161116
if threads
11171117
Threads.@threads for i in 1:nrow(ds)
1118-
res[i] = String(view(init0, 1:curr_pos[i]-1, i))
1118+
res[i] = String(view(init0, 1:curr_pos[i]-2, i))
11191119
end
11201120
else
11211121
for i in 1:nrow(ds)
1122-
res[i] = String(view(init0, 1:curr_pos[i]-1, i))
1122+
res[i] = String(view(init0, 1:curr_pos[i]-2, i))
11231123
end
11241124
end
11251125
end
@@ -1146,14 +1146,15 @@ function row_join(ds::AbstractDataset, cols = :; threads = true, delim::Abstract
11461146
curr_pos = ones(Int, nrow(ds))
11471147

11481148
delimiter = Base.CodeUnits(delim)
1149-
row_join!(init0, curr_pos, ds, repeat([identity], length(colsidx)), colsidx; delim = delimiter, quotechar = nothing, threads = threads)
1149+
row_join!(init0, curr_pos, ds, repeat([identity], length(colsidx)-1), view(colsidx, 1:length(colsidx)-1); delim = delimiter, quotechar = nothing, threads = threads)
11501150
if length(last)>0
1151-
last_uint = Base.CodeUnits(last)
1152-
last_len = length(last_uint)
1153-
_add_last_for_join!(init0, curr_pos, ds, last_uint, last_len, threads)
1154-
else
1155-
curr_pos .-= 1
1151+
delimiter = Base.CodeUnits(last)
1152+
end
1153+
if length(colsidx) > 1
1154+
_add_last_for_join!(init0, curr_pos, ds, delimiter, length(delimiter), threads)
11561155
end
1156+
row_join!(init0, curr_pos, ds, [identity], colsidx[length(colsidx)]; delim = delimiter, quotechar = nothing, threads = threads)
1157+
11571158
res = Vector{Union{String, Missing}}(undef, nrow(ds))
11581159
_convert_uint8_to_string!(res, init0, curr_pos, ds, threads)
11591160
res

test/byrow.jl

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -319,19 +319,22 @@
319319
ds = Dataset(x1 = ["A", "B,"], x2 =["TEA", "TOOOOL"])
320320
@test byrow(ds, join, r"x") == ["ATEA", "B,TOOOOL"]
321321
@test byrow(ds, join, r"x", delim = ",") == ["A,TEA", "B,,TOOOOL"]
322-
@test byrow(ds, join, r"x", last = ".") == ["ATEA.", "B,TOOOOL."]
323-
@test byrow(ds, join, r"x", last = "end", delim = "/-/") == ["A/-/TEAend", "B,/-/TOOOOLend"]
322+
@test byrow(ds, join, r"x", last = ".") == ["A.TEA", "B,.TOOOOL"]
323+
@test byrow(ds, join, r"x", last = "end", delim = "/-/") == ["AendTEA", "B,endTOOOOL"]
324324

325325
@test byrow(view(ds, [1,2], :), join, r"x") == ["ATEA", "B,TOOOOL"]
326326
@test byrow(view(ds, [1,2], :), join, r"x", delim = ",") == ["A,TEA", "B,,TOOOOL"]
327-
@test byrow(view(ds, [1,2], :), join, r"x", last = ".") == ["ATEA.", "B,TOOOOL."]
328-
@test byrow(view(ds, [1,2], :), join, r"x", last = "end", delim = "/-/") == ["A/-/TEAend", "B,/-/TOOOOLend"]
327+
@test byrow(view(ds, [1,2], :), join, r"x", last = ".") == ["A.TEA", "B,.TOOOOL"]
328+
@test byrow(view(ds, [1,2], :), join, r"x", last = "end", delim = "/-/") == ["AendTEA", "B,endTOOOOL"]
329329

330330
repeat!(ds, 1000)
331331
@test byrow(ds, join, r"x", threads = true) == repeat(["ATEA", "B,TOOOOL"], 1000)
332332
@test byrow(ds, join, r"x", threads = true, delim = ",") == repeat(["A,TEA", "B,,TOOOOL"], 1000)
333-
@test byrow(ds, join, r"x", threads = true, last = ".") == repeat(["ATEA.", "B,TOOOOL."], 1000)
334-
@test byrow(ds, join, r"x", threads = true, last = "end", delim = "/-/") == repeat(["A/-/TEAend", "B,/-/TOOOOLend"], 1000)
333+
@test byrow(ds, join, r"x", threads = true, last = ".") == repeat(["A.TEA", "B,.TOOOOL"], 1000)
334+
@test byrow(ds, join, r"x", threads = true, last = "end", delim = "/-/") == repeat(["AendTEA", "B,endTOOOOL"], 1000)
335+
336+
ds = Dataset(x = [1,10], x2 = ["A", "BC"], x3 = [2.0,4.54])
337+
@test byrow(ds, join, :, delim = "--", last = "-") == ["1--A-2.0", "10--BC-4.54"]
335338
end
336339

337340
@testset "cum*/! - sort/!" begin

0 commit comments

Comments
 (0)