|
1 | 1 | #!/usr/bin/awk -f
|
2 | 2 | #
|
3 | 3 | # Software: JSON.awk - a practical JSON parser written in awk
|
4 |
| -# Version: 1.4.1 |
| 4 | +# Version: 1.4.2 |
5 | 5 | # Copyright (c) 2013-2020, step
|
6 | 6 | # License: MIT or Apache 2
|
7 | 7 | # Project home: https://github.com/step-/JSON.awk
|
@@ -338,21 +338,21 @@ function tokenize(a1) { #{{{1
|
338 | 338 | # - reduce [:cntrl:] to [\000-\037]; https://github.com/step-/JSON.awk/issues/5
|
339 | 339 | # - reduce [:space:] to [ \t\n\r]; https://tools.ietf.org/html/rfc8259#page-5 ws
|
340 | 340 | # - replace {4} quantifier with three [0-9a-fA-F] for mawk; https://unix.stackexchange.com/a/506125
|
341 |
| -# - BOM encodings UTF-8, UTF16-LE and UTF-BE; https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding |
| 341 | +# - UTF-8 BOM signature; https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding |
342 | 342 | # ----------
|
343 | 343 | # TOKENS = BOM "|" STRING "|" NUMBER "|" KEYWORD "|" SPACE "|."
|
344 |
| -# BOM = "^\357\273\277|^\377\376|^\376\377" |
| 344 | +# BOM = "^\357\273\277" # cf. issue #17 |
345 | 345 | # STRING = "\"" CHAR "*(" ESCAPE CHAR "*)*\""
|
346 | 346 | # ESCAPE = "(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})"
|
347 | 347 | # CHAR = "[^[:cntrl:]\\\"]"
|
348 | 348 | # NUMBER = "-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?"
|
349 | 349 | # KEYWORD = "null|false|true"
|
350 | 350 | # SPACE = "[[:space:]]+"
|
351 | 351 |
|
352 |
| - gsub(/^\357\273\277|^\377\376|^\376\377|"[^"\\\000-\037]*((\\[^u\000-\037]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])[^"\\\000-\037]*)*"|-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?|null|false|true|[ \t\n\r]+|./, "\n&", a1) |
| 352 | + gsub(/^\357\273\277|"[^"\\\000-\037]*((\\[^u\000-\037]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])[^"\\\000-\037]*)*"|-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?|null|false|true|[ \t\n\r]+|./, "\n&", a1) |
353 | 353 | gsub("\n" "[ \t\n\r]+", "\n", a1)
|
354 | 354 | # ^\n BOM?
|
355 |
| - sub(/^\n((\357\273\277|\377\376|\376\377)\n)?/, "", a1) |
| 355 | + sub(/^\n(\357\273\277\n)?/, "", a1) |
356 | 356 | ITOKENS=0 # get_token() helper
|
357 | 357 | return NTOKENS = split(a1, TOKENS, /\n/)
|
358 | 358 | }
|
|
0 commit comments