Skip to content

Commit 67a3715

Browse files
committed
fix: OSD: adjust horizon rules for old-style O horizons, fractional depths, and variants in depth units
1 parent 76a0bfe commit 67a3715

File tree

1 file changed

+25
-21
lines changed

1 file changed

+25
-21
lines changed

R/parseOSD_functions.R

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -303,28 +303,27 @@
303303

304304
#' @importFrom stringi stri_match_all
305305
.extractHzData <- function(tp, logfile = "OSD.log", filename = "FOO.txt") {
306-
306+
307307
# detect horizons with both top and bottom depths
308-
hz.rule <- "([\\^\\'\\/a-zA-Z0-9]+(?: and [\\^\\'\\/a-zA-Z0-9]+)?)\\s*[-=\u2014]+\\s*([Ol0-9.]+)\\s*?(to|-)?\\s+?([Ol0-9.]+)\\s*?(in|inches|cm|centimeters)"
309-
308+
hz.rule <- "([\\^\\'\\/a-zA-Z0-9]+(?: and [\\^\\'\\/a-zA-Z0-9]+)?)\\s*[-=\u2014]+\\s*([Ol0-9.]+)\\s*?(to|-)?\\s+?([Ol0-9.]+)\\s*?(inches|in|cm|centimeters)"
309+
310310
# detect horizons with no bottom depth
311-
hz.rule.no.bottom <- "([\\^\\'\\/a-zA-Z0-9]+(?: and [\\^\\'\\/a-zA-Z0-9]+)?)\\s*[-=\u2014]+?\\s*([Ol0-9.]+)\\s*(to|-)?\\s*([Ol0-9.]+)?\\s*?(in|inches|cm|centimeters)"
312-
311+
hz.rule.no.bottom <- "([\\^\\'\\/a-zA-Z0-9]+(?: and [\\^\\'\\/a-zA-Z0-9]+)?)\\s*[-=]+?\\s*([Ol0-9./ ]+)\\s*(inche?s?|in|cm|centimeters)?\\s*(to|-)?\\s*([Ol0-9./ ]+)?\\s*(inche?s?|in|cm|centimeters)?"
312+
313313
## default encoding of colors: Toggle dry/moist assumption
314314
##
315315
## Profile-level statement: Colors are for dry soil unless otherwise stated | Colors are for moist soil unless otherwise stated
316-
##
316+
##
317317
## Examples:
318318
## moist:
319319
## E1--7 to 12 inches; very dark gray (10YR 3/1) silt loam, 50 percent gray (10YR 5/1) and 50 percent gray (10YR 6/1) dry; moderate thin platy structure parting to weak thin platy; friable, soft; common fine and medium roots throughout; common fine tubular pores; few fine distinct dark yellowish brown (10YR 4/6) friable masses of iron accumulations with sharp boundaries on faces of peds; strongly acid; clear wavy boundary.
320-
##
320+
##
321321
## dry:
322322
## A--0 to 6 inches; light gray (10YR 7/2) loam, dark grayish brown (10YR 4/2) moist; moderate coarse subangular blocky structure; slightly hard, friable, slightly sticky and slightly plastic; many very fine roots; many very fine and few fine tubular and many very fine interstitial pores; 10 percent pebbles; strongly acid (pH 5.1); clear wavy boundary. (1 to 8 inches thick)
323323
##
324324
dry.is.default <- length(grep('for[ athe]+(?:air-* *)?dr[yied]+[ \\n,]+(colors|soil|conditions)', tp, ignore.case = TRUE)) > 0
325325
moist.is.default <- length(grep('for[ athe]+(wet|moi*st)[ \\n,]+(rubbed|crushed|broken|interior|soil|conditions)', tp, ignore.case = TRUE)) > 0
326326

327-
328327
if (dry.is.default)
329328
default.moisture.state <- 'dry'
330329
if (moist.is.default)
@@ -347,7 +346,7 @@
347346

348347
# eliminate empty lines within typical pedon
349348
tp <- tp[nzchar(trimws(tp))]
350-
349+
351350
# ID starting lines of horizon information
352351
hz.idx <- sort(unique(c(grep(hz.rule, tp), grep(hz.rule.no.bottom, tp))))
353352

@@ -356,7 +355,7 @@
356355
if (length(first.line.flag) > 0) {
357356
hz.idx <- hz.idx[-first.line.flag]
358357
}
359-
358+
360359
check.multiline <- diff(hz.idx) > 1
361360
if (any(check.multiline)) {
362361
# multiline typical pedon horizon formatting (needs fix)
@@ -380,17 +379,23 @@
380379
# if none, then try searching for only top depths
381380
if (all(is.na(h))) {
382381
# this won't have the correct number of elements, adjust manually
383-
h <- stringi::stri_match(this.chunk, regex = hz.rule.no.bottom)
384-
h_num <- grep("^\\d+$", h)
385-
h_alp <- grep("[A-Za-z]", h)[2:3]
386-
h <- h[sort(c(h_num, h_alp))]
387-
382+
h <- trimws(stringi::stri_match(this.chunk, regex = hz.rule.no.bottom))
383+
h[2] <- gsub("0", "O", h[2], fixed=TRUE)
384+
h[6] <- gsub("l", "1", h[6], fixed=TRUE)
385+
h <- gsub(" *[1l]/2", ".5", h)
386+
h <- gsub(" *[1l]/[48]", ".25", h) # NB: fudging 1/8 inch -> 1 cm
387+
h <- gsub("^\\.", "0.", h)
388+
h_num <- grep("^\\d+\\.*\\d*$", h)
388389
# fill missing depth with NA
389-
if (length(h) == 3) {
390-
h <- c(h, h[3])
391-
h[3] <- NA
390+
if (length(h_num) == 1) {
391+
h_num <- c(h_num, NA)
392392
}
393+
h_alp <- grep("[A-Za-z]", h)[2:3]
394+
h <- h[c(h_alp[1], h_num, h_alp[2])]
395+
393396
} else {
397+
h[2] <- gsub("0", "O", h[2], fixed=TRUE)
398+
h[c(3,5)] <- gsub("l", "1", h[c(3,5)], fixed=TRUE)
394399
h <- h[c(2:3,5:6)]
395400
}
396401

@@ -403,7 +408,7 @@
403408
## TODO: test this!
404409
# parse ALL colors, result is a multi-row matrix, 5th column is moisture state
405410
colors <- stringi::stri_match_all(this.chunk, regex = color.rule)[[1]]
406-
411+
407412
# replace missing moisture state with (parsed) default value
408413
colors[, 5][which(colors[, 5] == '')] <- default.moisture.state
409414

@@ -448,9 +453,8 @@
448453
moist.colors$moist_chroma <- as.numeric(moist.colors$moist_chroma)
449454
})
450455

451-
## TODO: sanity check / unit reporting: this will fail when formatting is inconsistent (PROPER series)
452456
# convert in -> cm using the first horizon
453-
if (hz.data$units[1] %in% c('inches', 'in')) {
457+
if (hz.data$units[1] %in% c('inches', 'in', 'inch')) {
454458
hz.data$top <- round(hz.data$top * 2.54)
455459
hz.data$bottom <- round(hz.data$bottom * 2.54)
456460
}

0 commit comments

Comments
 (0)