diff --git a/cmd/ascii2der/encoder.go b/ascii2der/encoder.go similarity index 66% rename from cmd/ascii2der/encoder.go rename to ascii2der/encoder.go index 0d84559..a0b04d5 100644 --- a/cmd/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -12,15 +12,45 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" "fmt" + "unicode/utf16" "github.com/google/der-ascii/internal" ) +// appendUTF16 marshals r using UTF-16 and appends the result to dst, returning +// the updated slice. +// +// This logic intentionally tolerates unpaired surrogates. +func appendUTF16(dst []byte, r rune) []byte { + if r <= 0xffff { + return append(dst, byte(r>>8), byte(r)) + } + + r1, r2 := utf16.EncodeRune(r) + dst = append(dst, byte(r1>>8), byte(r1)) + dst = append(dst, byte(r2>>8), byte(r2)) + return dst +} + +// appendUTF16 marshals r using UTF-32 and appends the result to dst, returning +// the updated slice. +// +// In other words, this function writes r as an integer in big-endian order. +func appendUTF32(dst []byte, r rune) []byte { + return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) +} + +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// This function is the same as appendBase128WithLength with length set to zero, +// which cannot fail. func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { @@ -30,6 +60,11 @@ func appendBase128(dst []byte, value uint32) []byte { return dst } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. var l int @@ -120,18 +155,25 @@ func appendInteger(dst []byte, value int64) []byte { return dst } -func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, bool) { +// appendObjectIdentifier marshals the given array of integers as an OID. +func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, error) { // Validate the input before anything is written. - if len(value) < 2 || value[0] > 2 || (value[0] < 2 && value[1] > 39) { - return dst, false + if len(value) < 2 { + return dst, errors.New("OIDs must have at least two arcs") + } + if value[0] > 2 { + return dst, fmt.Errorf("first arc of an OID must be one of 0, 1, or 2; got %d", value[0]) + } + if value[0] < 2 && value[1] > 39 { + return dst, fmt.Errorf("second arc of an OID must be at most 39; got %d", value[1]) } if value[0]*40+value[1] < value[1] { - return dst, false + return dst, errors.New("first two arcs overflowed") } dst = appendBase128(dst, value[0]*40+value[1]) for _, v := range value[2:] { dst = appendBase128(dst, v) } - return dst, true + return dst, nil } diff --git a/cmd/ascii2der/encoder_test.go b/ascii2der/encoder_test.go similarity index 95% rename from cmd/ascii2der/encoder_test.go rename to ascii2der/encoder_test.go index 5faa8ba..f0472b8 100644 --- a/cmd/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -162,21 +162,21 @@ var appendObjectIdentifierTests = []struct { func TestAppendObjectIdentifier(t *testing.T) { for i, tt := range appendObjectIdentifierTests { - dst, ok := appendObjectIdentifier(nil, tt.value) + dst, err := appendObjectIdentifier(nil, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if len(dst) != 0 { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) } } else if !bytes.Equal(dst, tt.encoded) { - t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, wanted %v.", i, tt.value, dst, tt.encoded) + t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, %v, wanted %v.", i, tt.value, dst, err, tt.encoded) } dst = []byte{0} - dst, ok = appendObjectIdentifier(dst, tt.value) + dst, err = appendObjectIdentifier(dst, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if !bytes.Equal(dst, []byte{0}) { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) diff --git a/ascii2der/examples_test.go b/ascii2der/examples_test.go new file mode 100644 index 0000000..e3fbbc9 --- /dev/null +++ b/ascii2der/examples_test.go @@ -0,0 +1,31 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "fmt" +) + +func Example() { + scanner := NewScanner(` + SEQUENCE { + INTEGER { "totally an integer" } + } +`) + + der, _ := scanner.Exec() + fmt.Printf("%x\n", der) + // Output: 30140212746f74616c6c7920616e20696e7465676572 +} diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go new file mode 100644 index 0000000..b0c0d81 --- /dev/null +++ b/ascii2der/scanner.go @@ -0,0 +1,597 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// package ascii2der implements the DER-ASCII language described in +// https://github.com/google/der-ascii/blob/master/language.txt. +// +// The Scanner type can be used to parse DER-ASCII files and output byte blobs +// that may or may not be valid DER. +package ascii2der + +import ( + "encoding/hex" + "errors" + "fmt" + "regexp" + "strconv" + "strings" + "unicode/utf8" + + "github.com/google/der-ascii/internal" +) + +// A Position describes a location in the input stream. +// +// The zero-value Position represents the first byte of an anonymous input file. +type Position struct { + Offset int // Byte offset. + Line int // Line number (zero-indexed). + Column int // Column number (zero-indexed byte, not rune, count). + File string // Optional file name for pretty-printing. +} + +// String converts a Position to a string. +func (p Position) String() string { + file := p.File + if file == "" { + file = "" + } + return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1) +} + +// A tokenKind is a kind of token. +type tokenKind int + +const ( + tokenBytes tokenKind = iota + tokenLeftCurly + tokenRightCurly + tokenIndefinite + tokenLongForm + tokenEOF +) + +// A ParseError may be produced while executing a DER ASCII file, wrapping +// another error along with a position. +// +// Errors produced by functions in this package my by type-asserted to +// ParseError to try and obtain the position at which the error occurred. +type ParseError struct { + Pos Position + Err error +} + +// Error makes this type into an error type. +func (e *ParseError) Error() string { + return fmt.Sprintf("%s: %s", e.Pos, e.Err) +} + +// Unwrap extracts the inner wrapped error. +// +// See errors.Unwrap(). +func (e *ParseError) Unwrap() error { + return e.Err +} + +// A token is a token in a DER ASCII file. +type token struct { + // Kind is the kind of the token. + Kind tokenKind + // Value, for a tokenBytes token, is the decoded value of the token in + // bytes. + Value []byte + // Pos is the position of the first byte of the token. + Pos Position + // Length, for a tokenLongForm token, is the number of bytes to use to + // encode the length, not including the initial one. + Length int +} + +var ( + regexpInteger = regexp.MustCompile(`^-?[0-9]+$`) + regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) +) + +// A Scanner represents parsing state for a DER ASCII file. +// +// A zero-value Scanner is ready to begin parsing (given that Input is set to +// a valid value). However, it is recommended to use NewScanner to create a new +// Scanner, since it can pre-populate fields other than Input with default +// settings. +type Scanner struct { + // Input is the input text being processed. + Input string + // Position is the current position at which parsing should + // resume. The Offset field is used for indexing into Input; the remaining + // fields are used for error-reporting. + pos Position +} + +// NewScanner creates a new scanner for parsing the given input. +func NewScanner(input string) *Scanner { + return &Scanner{Input: input} +} + +// SetFile sets the file path shown in this Scanner's error reports. +func (s *Scanner) SetFile(path string) { + s.pos.File = path +} + +// Exec consumes tokens until Input is exhausted, returning the resulting +// encoded maybe-DER. +func (s *Scanner) Exec() ([]byte, error) { + return s.exec(nil) +} + +// isEOF returns whether the cursor is at least n bytes ahead of the end of the +// input. +func (s *Scanner) isEOF(n int) bool { + return s.pos.Offset+n >= len(s.Input) +} + +// advance advances the scanner's cursor n positions. +// +// Unlike just s.pos.Offset += n, this will not proceed beyond the end of the +// string, and will update the line and column information accordingly. +func (s *Scanner) advance(n int) { + for i := 0; i < n && !s.isEOF(0); i++ { + if s.Input[s.pos.Offset] == '\n' { + s.pos.Line++ + s.pos.Column = 0 + } else { + s.pos.Column++ + } + s.pos.Offset++ + } +} + +// consume advances exactly n times and returns all source bytes between the +// initial cursor position and excluding the final cursor position. +// +// If EOF is reached before all n bytes are consumed, the function returns +// false. +func (s *Scanner) consume(n int) (string, bool) { + start := s.pos.Offset + s.advance(n) + if s.pos.Offset-start != n { + return "", false + } + + return s.Input[start:s.pos.Offset], true +} + +// consumeUntil advances the cursor until the given byte is seen, returning all +// source bytes between the initial cursor position and excluding the given +// byte. This function will advance past the searched-for byte. +// +// If EOF is reached before the byte is seen, the function returns false. +func (s *Scanner) consumeUntil(b byte) (string, bool) { + if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 { + text, _ := s.consume(i + 1) + return text[:i], true + } + return "", false +} + +// parseEscapeSequence parses a DER-ASCII escape sequence, returning the rune +// it escapes. +// +// Valid escapes are: +// \n \" \\ \xNN \uNNNN \UNNNNNNNN +// +// This function assumes that the scanner's cursor is currently on a \ rune. +func (s *Scanner) parseEscapeSequence() (rune, error) { + s.advance(1) // Skip the \. The caller is assumed to have validated it. + if s.isEOF(0) { + return 0, &ParseError{s.pos, errors.New("expected escape character")} + } + + switch c := s.Input[s.pos.Offset]; c { + case 'n': + s.advance(1) + return '\n', nil + case '"', '\\': + s.advance(1) + return rune(c), nil + case 'x', 'u', 'U': + s.advance(1) + + var digits int + switch c { + case 'x': + digits = 2 + case 'u': + digits = 4 + case 'U': + digits = 8 + } + + hexes, ok := s.consume(digits) + if !ok { + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} + } + + bytes, err := hex.DecodeString(hexes) + if err != nil { + return 0, &ParseError{s.pos, err} + } + + var r rune + for _, b := range bytes { + r <<= 8 + r |= rune(b) + } + return r, nil + default: + return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} + } +} + +// parseQuotedString parses a UTF-8 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a " rune. +func (s *Scanner) parseQuotedString() (token, error) { + s.advance(1) // Skip the ". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + switch c := s.Input[s.pos.Offset]; c { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + escapeStart := s.pos + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + if r > 0xff { + // TODO(davidben): Alternatively, should these encode as UTF-8? + return token{}, &ParseError{escapeStart, errors.New("illegal escape for quoted string")} + } + bytes = append(bytes, byte(r)) + default: + s.advance(1) + bytes = append(bytes, c) + } + } +} + +// parseUTF16String parses a UTF-16 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a u followed +// by a " rune. +func (s *Scanner) parseUTF16String() (token, error) { + s.advance(2) // Skip the u". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + + switch s.Input[s.pos.Offset] { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + bytes = appendUTF16(bytes, r) + default: + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) + // Note DecodeRuneInString may return utf8.RuneError if there is a + // legitimate replacement character in the input. The documentation + // says errors return (RuneError, 0) or (RuneError, 1). + if r == utf8.RuneError && n <= 1 { + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} + } + s.advance(n) + bytes = appendUTF16(bytes, r) + } + } +} + +// parseUTF32String parses a UTF-32 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a U followed +// by a " rune. +func (s *Scanner) parseUTF32String() (token, error) { + s.advance(2) // Skip the U". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + + switch s.Input[s.pos.Offset] { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + bytes = appendUTF32(bytes, r) + default: + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) + // Note DecodeRuneInString may return utf8.RuneError if there is a + // legitimate replacement charaacter in the input. The documentation + // says errors return (RuneError, 0) or (RuneError, 1). + if r == utf8.RuneError && n <= 1 { + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} + } + s.advance(n) + bytes = appendUTF32(bytes, r) + } + } +} + +// next lexes the next token. +func (s *Scanner) next() (token, error) { +again: + if s.isEOF(0) { + return token{Kind: tokenEOF, Pos: s.pos}, nil + } + + switch s.Input[s.pos.Offset] { + case ' ', '\t', '\n', '\r': + // Skip whitespace. + s.advance(1) + goto again + case '#': + // Skip to the end of the comment. + s.advance(1) + for !s.isEOF(0) { + wasNewline := s.Input[s.pos.Offset] == '\n' + s.advance(1) + if wasNewline { + break + } + } + goto again + case '{': + s.advance(1) + return token{Kind: tokenLeftCurly, Pos: s.pos}, nil + case '}': + s.advance(1) + return token{Kind: tokenRightCurly, Pos: s.pos}, nil + case '"': + return s.parseQuotedString() + case 'u': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { + return s.parseUTF16String() + } + case 'U': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { + return s.parseUTF32String() + } + case 'b': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '`' { + s.advance(2) // Skip the b`. + bitStr, ok := s.consumeUntil('`') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched `")} + } + + // The leading byte is the number of "extra" bits at the end. + var bitCount int + var sawPipe bool + value := []byte{0} + for i, r := range bitStr { + switch r { + case '0', '1': + if bitCount%8 == 0 { + value = append(value, 0) + } + if r == '1' { + value[bitCount/8+1] |= 1 << uint(7-bitCount%8) + } + bitCount++ + case '|': + if sawPipe { + return token{}, &ParseError{s.pos, errors.New("duplicate |")} + } + + // bitsRemaining is the number of bits remaining in the output that haven't + // been used yet. There cannot be more than that many bits past the |. + bitsRemaining := (len(value)-1)*8 - bitCount + inputRemaining := len(bitStr) - i - 1 + if inputRemaining > bitsRemaining { + return token{}, &ParseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} + } + + sawPipe = true + value[0] = byte(bitsRemaining) + default: + return token{}, &ParseError{s.pos, fmt.Errorf("unexpected rune %q", r)} + } + } + if !sawPipe { + value[0] = byte((len(value)-1)*8 - bitCount) + } + return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil + } + case '`': + s.advance(1) + hexStr, ok := s.consumeUntil('`') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched `")} + } + bytes, err := hex.DecodeString(hexStr) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil + case '[': + s.advance(1) + tagStr, ok := s.consumeUntil(']') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched [")} + } + tag, err := decodeTagString(tagStr) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + value, err := appendTag(nil, tag) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil + } + + // Normal token. Consume up to the next whitespace character, symbol, or + // EOF. + start := s.pos + s.advance(1) +loop: + for !s.isEOF(0) { + switch s.Input[s.pos.Offset] { + case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': + break loop + default: + s.advance(1) + } + } + + symbol := s.Input[start.Offset:s.pos.Offset] + + // See if it is a tag. + tag, ok := internal.TagByName(symbol) + if ok { + value, err := appendTag(nil, tag) + if err != nil { + // This is impossible; built-in tags always encode. + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: value, Pos: start}, nil + } + + if regexpInteger.MatchString(symbol) { + value, err := strconv.ParseInt(symbol, 10, 64) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil + } + + if regexpOID.MatchString(symbol) { + oidStr := strings.Split(symbol, ".") + var oid []uint32 + for _, s := range oidStr { + u, err := strconv.ParseUint(s, 10, 32) + if err != nil { + return token{}, &ParseError{start, err} + } + oid = append(oid, uint32(u)) + } + der, err := appendObjectIdentifier(nil, oid) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil + } + + if symbol == "TRUE" { + return token{Kind: tokenBytes, Value: []byte{0xff}, Pos: s.pos}, nil + } + + if symbol == "FALSE" { + return token{Kind: tokenBytes, Value: []byte{0x00}, Pos: s.pos}, nil + } + + if symbol == "indefinite" { + return token{Kind: tokenIndefinite}, nil + } + + if isLongFormOverride(symbol) { + l, err := decodeLongFormOverride(symbol) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenLongForm, Length: l}, nil + } + + return token{}, fmt.Errorf("unrecognized symbol %q", symbol) +} + +// exec is the main parser loop. +// +// The leftCurly argument, it not nil, represents the { that began the +// length-prefixed block we're currently executing. Because we need to encode +// the full extent of the contents of a {} before emitting the length prefix, +// this function calls itself with a non-nil leftCurly to encode it. +func (s *Scanner) exec(leftCurly *token) ([]byte, error) { + var out []byte + var lengthModifier *token + for { + token, err := s.next() + if err != nil { + return nil, err + } + if lengthModifier != nil && token.Kind != tokenLeftCurly { + return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + } + switch token.Kind { + case tokenBytes: + out = append(out, token.Value...) + case tokenLeftCurly: + child, err := s.exec(&token) + if err != nil { + return nil, err + } + var lengthOverride int + if lengthModifier != nil { + if lengthModifier.Kind == tokenIndefinite { + out = append(out, 0x80) + out = append(out, child...) + out = append(out, 0x00, 0x00) + lengthModifier = nil + break + } + if lengthModifier.Kind == tokenLongForm { + lengthOverride = lengthModifier.Length + } + } + out, err = appendLength(out, len(child), lengthOverride) + if err != nil { + // appendLength may fail if the lengthModifier was incompatible. + return nil, &ParseError{lengthModifier.Pos, err} + } + out = append(out, child...) + lengthModifier = nil + case tokenRightCurly: + if leftCurly != nil { + return out, nil + } + return nil, &ParseError{token.Pos, errors.New("unmatched '}'")} + case tokenLongForm, tokenIndefinite: + lengthModifier = &token + case tokenEOF: + if leftCurly == nil { + return out, nil + } + return nil, &ParseError{leftCurly.Pos, errors.New("unmatched '{'")} + default: + panic(token) + } + } +} diff --git a/cmd/ascii2der/scanner_test.go b/ascii2der/scanner_test.go similarity index 99% rename from cmd/ascii2der/scanner_test.go rename to ascii2der/scanner_test.go index cac14d2..ff0fe85 100644 --- a/cmd/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -383,9 +383,9 @@ indefinite long-form:2`, } func scanAll(in string) (tokens []token, ok bool) { - scanner := newScanner(in) + scanner := NewScanner(in) for { - token, err := scanner.Next() + token, err := scanner.next() if err != nil { return } @@ -446,7 +446,7 @@ var asciiToDERTests = []struct { func TestASCIIToDER(t *testing.T) { for i, tt := range asciiToDERTests { - out, err := asciiToDER(tt.in) + out, err := NewScanner(tt.in).Exec() ok := err == nil if !tt.ok { if ok { diff --git a/cmd/ascii2der/values.go b/ascii2der/values.go similarity index 99% rename from cmd/ascii2der/values.go rename to ascii2der/values.go index 03e82dc..d8406e0 100644 --- a/cmd/ascii2der/values.go +++ b/ascii2der/values.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" diff --git a/cmd/ascii2der/values_test.go b/ascii2der/values_test.go similarity index 99% rename from cmd/ascii2der/values_test.go rename to ascii2der/values_test.go index 17d6a9c..ea74213 100644 --- a/cmd/ascii2der/values_test.go +++ b/ascii2der/values_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "testing" diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index ca5c73d..b6749b7 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -20,6 +20,8 @@ import ( "fmt" "io/ioutil" "os" + + "github.com/google/der-ascii/ascii2der" ) var inPath = flag.String("i", "", "input file to use (defaults to stdin)") @@ -52,7 +54,10 @@ func main() { os.Exit(1) } - outBytes, err := asciiToDER(string(inBytes)) + scanner := ascii2der.NewScanner(string(inBytes)) + scanner.SetFile(*inPath) + + outBytes, err := scanner.Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1) diff --git a/cmd/ascii2der/scanner.go b/cmd/ascii2der/scanner.go deleted file mode 100644 index f4c49f2..0000000 --- a/cmd/ascii2der/scanner.go +++ /dev/null @@ -1,532 +0,0 @@ -// Copyright 2015 The DER ASCII Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "encoding/hex" - "errors" - "fmt" - "regexp" - "strconv" - "strings" - "unicode/utf16" - "unicode/utf8" - - "github.com/google/der-ascii/internal" -) - -// A position describes a location in the input stream. -type position struct { - Offset int // offset, starting at 0 - Line int // line number, starting at 1 - Column int // column number, starting at 1 (byte count) -} - -// A tokenKind is a kind of token. -type tokenKind int - -const ( - tokenBytes tokenKind = iota - tokenLeftCurly - tokenRightCurly - tokenIndefinite - tokenLongForm - tokenEOF -) - -// A parseError is an error during parsing DER ASCII. -type parseError struct { - Pos position - Err error -} - -func (t *parseError) Error() string { - return fmt.Sprintf("line %d: %s", t.Pos.Line, t.Err) -} - -// A token is a token in a DER ASCII file. -type token struct { - // Kind is the kind of the token. - Kind tokenKind - // Value, for a tokenBytes token, is the decoded value of the token in - // bytes. - Value []byte - // Pos is the position of the first byte of the token. - Pos position - // Length, for a tokenLongForm token, is the number of bytes to use to - // encode the length, not including the initial one. - Length int -} - -var ( - regexpInteger = regexp.MustCompile(`^-?[0-9]+$`) - regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) -) - -type scanner struct { - text string - pos position -} - -func newScanner(text string) *scanner { - return &scanner{text: text, pos: position{Line: 1}} -} - -func (s *scanner) parseEscapeSequence() (rune, error) { - s.advance() // Skip the \. The caller is assumed to have validated it. - if s.isEOF() { - return 0, &parseError{s.pos, errors.New("expected escape character")} - } - switch c := s.text[s.pos.Offset]; c { - case 'n': - s.advance() - return '\n', nil - case '"', '\\': - s.advance() - return rune(c), nil - case 'x': - s.advance() - if s.pos.Offset+2 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(2) - return rune(b[0]), nil - case 'u': - s.advance() - if s.pos.Offset+4 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(4) - return rune(b[0])<<8 | rune(b[1]), nil - case 'U': - s.advance() - if s.pos.Offset+8 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(8) - return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil - default: - return 0, &parseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} - } -} - -func (s *scanner) parseQuotedString() (token, error) { - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - escapeStart := s.pos - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - if r > 0xff { - // TODO(davidben): Alternatively, should these encode as UTF-8? - return token{}, &parseError{escapeStart, errors.New("illegal escape for quoted string")} - } - bytes = append(bytes, byte(r)) - default: - s.advance() - bytes = append(bytes, c) - } - } -} - -func appendUTF16(b []byte, r rune) []byte { - if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. - return append(b, byte(r>>8), byte(r)) - } - - r1, r2 := utf16.EncodeRune(r) - b = append(b, byte(r1>>8), byte(r1)) - b = append(b, byte(r2>>8), byte(r2)) - return b -} - -func (s *scanner) parseUTF16String() (token, error) { - s.advance() // Skip the u. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - bytes = appendUTF16(bytes, r) - default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) - // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation - // says errors return (RuneError, 0) or (RuneError, 1). - if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} - } - s.advanceBytes(n) - bytes = appendUTF16(bytes, r) - } - } -} - -func appendUTF32(b []byte, r rune) []byte { - return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) -} - -func (s *scanner) parseUTF32String() (token, error) { - s.advance() // Skip the U. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - bytes = appendUTF32(bytes, r) - default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) - // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation - // says errors return (RuneError, 0) or (RuneError, 1). - if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} - } - s.advanceBytes(n) - bytes = appendUTF32(bytes, r) - } - } -} - -func (s *scanner) Next() (token, error) { -again: - if s.isEOF() { - return token{Kind: tokenEOF, Pos: s.pos}, nil - } - - switch s.text[s.pos.Offset] { - case ' ', '\t', '\n', '\r': - // Skip whitespace. - s.advance() - goto again - case '#': - // Skip to the end of the comment. - s.advance() - for !s.isEOF() { - wasNewline := s.text[s.pos.Offset] == '\n' - s.advance() - if wasNewline { - break - } - } - goto again - case '{': - s.advance() - return token{Kind: tokenLeftCurly, Pos: s.pos}, nil - case '}': - s.advance() - return token{Kind: tokenRightCurly, Pos: s.pos}, nil - case '"': - return s.parseQuotedString() - case 'u': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { - return s.parseUTF16String() - } - case 'U': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { - return s.parseUTF32String() - } - case 'b': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' { - s.advance() // Skip the b. - s.advance() // Skip the `. - bitStr, ok := s.consumeUpTo('`') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} - } - - // The leading byte is the number of "extra" bits at the end. - var bitCount int - var sawPipe bool - value := []byte{0} - for i, r := range bitStr { - switch r { - case '0', '1': - if bitCount%8 == 0 { - value = append(value, 0) - } - if r == '1' { - value[bitCount/8+1] |= 1 << uint(7-bitCount%8) - } - bitCount++ - case '|': - if sawPipe { - return token{}, &parseError{s.pos, errors.New("duplicate |")} - } - - // bitsRemaining is the number of bits remaining in the output that haven't - // been used yet. There cannot be more than that many bits past the |. - bitsRemaining := (len(value)-1)*8 - bitCount - inputRemaining := len(bitStr) - i - 1 - if inputRemaining > bitsRemaining { - return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} - } - - sawPipe = true - value[0] = byte(bitsRemaining) - default: - return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)} - } - } - if !sawPipe { - value[0] = byte((len(value)-1)*8 - bitCount) - } - return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil - } - case '`': - s.advance() - hexStr, ok := s.consumeUpTo('`') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} - } - bytes, err := hex.DecodeString(hexStr) - if err != nil { - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil - case '[': - s.advance() - tagStr, ok := s.consumeUpTo(']') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched [")} - } - tag, err := decodeTagString(tagStr) - if err != nil { - return token{}, &parseError{s.pos, err} - } - value, err := appendTag(nil, tag) - if err != nil { - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil - } - - // Normal token. Consume up to the next whitespace character, symbol, or - // EOF. - start := s.pos - s.advance() -loop: - for !s.isEOF() { - switch s.text[s.pos.Offset] { - case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': - break loop - default: - s.advance() - } - } - - symbol := s.text[start.Offset:s.pos.Offset] - - // See if it is a tag. - tag, ok := internal.TagByName(symbol) - if ok { - value, err := appendTag(nil, tag) - if err != nil { - // This is impossible; built-in tags always encode. - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: value, Pos: start}, nil - } - - if regexpInteger.MatchString(symbol) { - value, err := strconv.ParseInt(symbol, 10, 64) - if err != nil { - return token{}, &parseError{start, err} - } - return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil - } - - if regexpOID.MatchString(symbol) { - oidStr := strings.Split(symbol, ".") - var oid []uint32 - for _, s := range oidStr { - u, err := strconv.ParseUint(s, 10, 32) - if err != nil { - return token{}, &parseError{start, err} - } - oid = append(oid, uint32(u)) - } - der, ok := appendObjectIdentifier(nil, oid) - if !ok { - return token{}, errors.New("invalid OID") - } - return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil - } - - if symbol == "TRUE" { - return token{Kind: tokenBytes, Value: []byte{0xff}, Pos: s.pos}, nil - } - - if symbol == "FALSE" { - return token{Kind: tokenBytes, Value: []byte{0x00}, Pos: s.pos}, nil - } - - if symbol == "indefinite" { - return token{Kind: tokenIndefinite}, nil - } - - if isLongFormOverride(symbol) { - l, err := decodeLongFormOverride(symbol) - if err != nil { - return token{}, &parseError{start, err} - } - return token{Kind: tokenLongForm, Length: l}, nil - } - - return token{}, fmt.Errorf("unrecognized symbol %q", symbol) -} - -func (s *scanner) isEOF() bool { - return s.pos.Offset >= len(s.text) -} - -func (s *scanner) advance() { - if !s.isEOF() { - if s.text[s.pos.Offset] == '\n' { - s.pos.Line++ - s.pos.Column = 0 - } else { - s.pos.Column++ - } - s.pos.Offset++ - } -} - -func (s *scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() - } -} - -func (s *scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.text[s.pos.Offset] == b { - ret := s.text[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() - } - return "", false -} - -func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { - var out []byte - var lengthModifier *token - for { - token, err := scanner.Next() - if err != nil { - return nil, err - } - if lengthModifier != nil && token.Kind != tokenLeftCurly { - return nil, &parseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} - } - switch token.Kind { - case tokenBytes: - out = append(out, token.Value...) - case tokenLeftCurly: - child, err := asciiToDERImpl(scanner, &token) - if err != nil { - return nil, err - } - var lengthOverride int - if lengthModifier != nil { - if lengthModifier.Kind == tokenIndefinite { - out = append(out, 0x80) - out = append(out, child...) - out = append(out, 0x00, 0x00) - lengthModifier = nil - break - } - if lengthModifier.Kind == tokenLongForm { - lengthOverride = lengthModifier.Length - } - } - out, err = appendLength(out, len(child), lengthOverride) - if err != nil { - // appendLength may fail if the lengthModifier was incompatible. - return nil, &parseError{lengthModifier.Pos, err} - } - out = append(out, child...) - lengthModifier = nil - case tokenRightCurly: - if leftCurly != nil { - return out, nil - } - return nil, &parseError{token.Pos, errors.New("unmatched '}'")} - case tokenLongForm, tokenIndefinite: - lengthModifier = &token - case tokenEOF: - if leftCurly == nil { - return out, nil - } - return nil, &parseError{leftCurly.Pos, errors.New("unmatched '{'")} - default: - panic(token) - } - } -} - -func asciiToDER(input string) ([]byte, error) { - scanner := newScanner(input) - return asciiToDERImpl(scanner, nil) -}