diff --git a/README.md b/README.md index 9a9a8d4..a5bbb0a 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ structures and malformed variants of them. It provides two tools, `ascii2der` and `der2ascii`, to convert DER ASCII to a byte string and vice versa. To install them, run: - go get github.com/google/der-ascii/cmd/... + go install github.com/google/der-ascii/cmd/...@latest These tools may be used to create test inputs by taking an existing DER or BER structure, disassembling it with `der2ascii` into DER ASCII, making diff --git a/ascii2der/builtins.go b/ascii2der/builtins.go new file mode 100644 index 0000000..a3d7f06 --- /dev/null +++ b/ascii2der/builtins.go @@ -0,0 +1,165 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "crypto" + "crypto/ecdsa" + "crypto/ed25519" + "crypto/rsa" + "crypto/x509" + "errors" + "fmt" + "reflect" +) + +// setDefaultBuiltins adds the default builtins to the given Scanner's builtin +// function table. +// +// Some builtins may capture the Scanner pointer if they operate on scanner +// state, such as variables. +func setDefaultBuiltins(scanner *Scanner) { + // NOTE: If adding a builtin, remember to document it in language.txt! + scanner.Builtins = map[string]Builtin{ + // define(var, val) sets var = val in the scanner's variable table. + // Variables may be redefined. Expands to the empty string. + "define": func(args [][]byte) ([]byte, error) { + if len(args) != 2 { + return nil, errors.New("expected two arguments to define()") + } + + if scanner.Vars == nil { + scanner.Vars = make(map[string][]byte) + } + scanner.Vars[string(args[0])] = args[1] + + return nil, nil + }, + + // var(var) expands to whatever var is set to in the scanner's variable table. + // Error if var is not defined. + // + // var(var, default) behaves similarly, except expands to default if var is + // not defined. + "var": func(args [][]byte) ([]byte, error) { + switch len(args) { + case 1: + val, ok := scanner.Vars[string(args[0])] + if !ok { + return nil, fmt.Errorf("var() with undefined name: %q", string(args[0])) + } + return val, nil + case 2: + val, ok := scanner.Vars[string(args[0])] + if !ok { + return args[1], nil + } + return val, nil + default: + return nil, errors.New("expected one or two arguments to var()") + } + }, + + // sign(algorithm, key, message) expands into a digital signature for message + // using the given algorithm and key. key must be a private key in PKCS #8 + // format. + // + // The supported algorithm strings are: + // - "RSA_PKCS1_SHA1", RSA_PKCS1_SHA256", "RSA_PKCS1_SHA384", + // "RSA_PKCS1_SHA512", for RSA-SSA with the specified hash function. + // - "ECDSA_SHA256", "ECDSA_SHA384", "ECDSA_SHA512", for ECDSA with the + // specified hash function. + // - "Ed25519" for itself. + "sign": func(args [][]byte) ([]byte, error) { + if len(args) != 3 { + return nil, errors.New("expected two arguments to sign()") + } + + pk8, err := x509.ParsePKCS8PrivateKey(args[1]) + if err != nil { + return nil, err + } + + var signer crypto.Signer + var hash crypto.Hash + switch string(args[0]) { + case "RSA_PKCS1_SHA1": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA1 + case "RSA_PKCS1_SHA256": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA256 + case "RSA_PKCS1_SHA384": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA384 + case "RSA_PKCS1_SHA512": + key, ok := pk8.(*rsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected RSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA512 + case "ECSDA_SHA256": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA256 + case "ECSDA_SHA384": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA384 + case "ECSDA_SHA512": + key, ok := pk8.(*ecdsa.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected ECDSA key, got %v", reflect.TypeOf(key)) + } + signer = key + hash = crypto.SHA512 + case "Ed22519": + key, ok := pk8.(ed25519.PrivateKey) + if !ok { + return nil, fmt.Errorf("expected Ed25519 key, got %v", reflect.TypeOf(key)) + } + signer = key + } + + digest := args[2] + if hash > 0 { + hash := hash.New() + hash.Write(digest) + digest = hash.Sum(nil) + } + + return signer.Sign(nil, digest, hash) + }, + } +} diff --git a/cmd/ascii2der/encoder.go b/ascii2der/encoder.go similarity index 66% rename from cmd/ascii2der/encoder.go rename to ascii2der/encoder.go index 0d84559..a0b04d5 100644 --- a/cmd/ascii2der/encoder.go +++ b/ascii2der/encoder.go @@ -12,15 +12,45 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" "fmt" + "unicode/utf16" "github.com/google/der-ascii/internal" ) +// appendUTF16 marshals r using UTF-16 and appends the result to dst, returning +// the updated slice. +// +// This logic intentionally tolerates unpaired surrogates. +func appendUTF16(dst []byte, r rune) []byte { + if r <= 0xffff { + return append(dst, byte(r>>8), byte(r)) + } + + r1, r2 := utf16.EncodeRune(r) + dst = append(dst, byte(r1>>8), byte(r1)) + dst = append(dst, byte(r2>>8), byte(r2)) + return dst +} + +// appendUTF16 marshals r using UTF-32 and appends the result to dst, returning +// the updated slice. +// +// In other words, this function writes r as an integer in big-endian order. +func appendUTF32(dst []byte, r rune) []byte { + return append(dst, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) +} + +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// This function is the same as appendBase128WithLength with length set to zero, +// which cannot fail. func appendBase128(dst []byte, value uint32) []byte { dst, err := appendBase128WithLength(dst, value, 0) if err != nil { @@ -30,6 +60,11 @@ func appendBase128(dst []byte, value uint32) []byte { return dst } +// appendBase128 marshals an integer in base 128, a varint format used by OIDs +// and long-form tag numbers, and appends the result to dst, returning the +// updated slice. +// +// If length is zero, the minimal length is chosen. func appendBase128WithLength(dst []byte, value uint32, length int) ([]byte, error) { // Count how many bytes are needed. var l int @@ -120,18 +155,25 @@ func appendInteger(dst []byte, value int64) []byte { return dst } -func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, bool) { +// appendObjectIdentifier marshals the given array of integers as an OID. +func appendObjectIdentifier(dst []byte, value []uint32) ([]byte, error) { // Validate the input before anything is written. - if len(value) < 2 || value[0] > 2 || (value[0] < 2 && value[1] > 39) { - return dst, false + if len(value) < 2 { + return dst, errors.New("OIDs must have at least two arcs") + } + if value[0] > 2 { + return dst, fmt.Errorf("first arc of an OID must be one of 0, 1, or 2; got %d", value[0]) + } + if value[0] < 2 && value[1] > 39 { + return dst, fmt.Errorf("second arc of an OID must be at most 39; got %d", value[1]) } if value[0]*40+value[1] < value[1] { - return dst, false + return dst, errors.New("first two arcs overflowed") } dst = appendBase128(dst, value[0]*40+value[1]) for _, v := range value[2:] { dst = appendBase128(dst, v) } - return dst, true + return dst, nil } diff --git a/cmd/ascii2der/encoder_test.go b/ascii2der/encoder_test.go similarity index 95% rename from cmd/ascii2der/encoder_test.go rename to ascii2der/encoder_test.go index 5faa8ba..f0472b8 100644 --- a/cmd/ascii2der/encoder_test.go +++ b/ascii2der/encoder_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -162,21 +162,21 @@ var appendObjectIdentifierTests = []struct { func TestAppendObjectIdentifier(t *testing.T) { for i, tt := range appendObjectIdentifierTests { - dst, ok := appendObjectIdentifier(nil, tt.value) + dst, err := appendObjectIdentifier(nil, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if len(dst) != 0 { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) } } else if !bytes.Equal(dst, tt.encoded) { - t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, wanted %v.", i, tt.value, dst, tt.encoded) + t.Errorf("%d. appendObjectIdentifier(nil, %v) = %v, %v, wanted %v.", i, tt.value, dst, err, tt.encoded) } dst = []byte{0} - dst, ok = appendObjectIdentifier(dst, tt.value) + dst, err = appendObjectIdentifier(dst, tt.value) if !tt.ok { - if ok { + if err == nil { t.Errorf("%d. appendObjectIdentifier(nil, %v) unexpectedly suceeded.", i, tt.value) } else if !bytes.Equal(dst, []byte{0}) { t.Errorf("%d. appendObjectIdentifier did not preserve input.", i) diff --git a/ascii2der/examples_test.go b/ascii2der/examples_test.go new file mode 100644 index 0000000..e3fbbc9 --- /dev/null +++ b/ascii2der/examples_test.go @@ -0,0 +1,31 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ascii2der + +import ( + "fmt" +) + +func Example() { + scanner := NewScanner(` + SEQUENCE { + INTEGER { "totally an integer" } + } +`) + + der, _ := scanner.Exec() + fmt.Printf("%x\n", der) + // Output: 30140212746f74616c6c7920616e20696e7465676572 +} diff --git a/ascii2der/scanner.go b/ascii2der/scanner.go new file mode 100644 index 0000000..0f49caf --- /dev/null +++ b/ascii2der/scanner.go @@ -0,0 +1,690 @@ +// Copyright 2015 The DER ASCII Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// package ascii2der implements the DER-ASCII language described in +// https://github.com/google/der-ascii/blob/master/language.txt. +// +// The Scanner type can be used to parse DER-ASCII files and output byte blobs +// that may or may not be valid DER. +package ascii2der + +import ( + "encoding/hex" + "errors" + "fmt" + "regexp" + "strconv" + "strings" + "unicode/utf8" + + "github.com/google/der-ascii/internal" +) + +// A Position describes a location in the input stream. +// +// The zero-value Position represents the first byte of an anonymous input file. +type Position struct { + Offset int // Byte offset. + Line int // Line number (zero-indexed). + Column int // Column number (zero-indexed byte, not rune, count). + File string // Optional file name for pretty-printing. +} + +// String converts a Position to a string. +func (p Position) String() string { + file := p.File + if file == "" { + file = "" + } + return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1) +} + +// A tokenKind is a kind of token. +type tokenKind int + +const ( + tokenBytes tokenKind = iota + tokenLeftCurly + tokenRightCurly + tokenIndefinite + tokenLongForm + tokenComma + tokenLeftParen + tokenRightParen + tokenWord + tokenEOF +) + +// A ParseError may be produced while executing a DER ASCII file, wrapping +// another error along with a position. +// +// Errors produced by functions in this package my by type-asserted to +// ParseError to try and obtain the position at which the error occurred. +type ParseError struct { + Pos Position + Err error +} + +// Error makes this type into an error type. +func (e *ParseError) Error() string { + return fmt.Sprintf("%s: %s", e.Pos, e.Err) +} + +// Unwrap extracts the inner wrapped error. +// +// See errors.Unwrap(). +func (e *ParseError) Unwrap() error { + return e.Err +} + +// A token is a token in a DER ASCII file. +type token struct { + // Kind is the kind of the token. + Kind tokenKind + // Value, for a tokenBytes token, is the decoded value of the token in + // bytes. + Value []byte + // Pos is the position of the first byte of the token. + Pos Position + // Length, for a tokenLongForm token, is the number of bytes to use to + // encode the length, not including the initial one. + Length int +} + +var ( + regexpInteger = regexp.MustCompile(`^-?[0-9]+$`) + regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) +) + +type Builtin func(args [][]byte) ([]byte, error) + +// A Scanner represents parsing state for a DER ASCII file. +// +// A zero-value Scanner is ready to begin parsing (given that Input is set to +// a valid value). However, it is recommended to use NewScanner to create a new +// Scanner, since it can pre-populate fields other than Input with default +// settings. +type Scanner struct { + // Input is the input text being processed. + Input string + // Builtins is a table of builtin functions that can be called with the usual + // function call syntax in a DER ASCII file. NewScanner will return a Scanner + // with a pre-populated table consisting of those functions defined in + // language.txt, but users may add or remove whatever functions they wish. + Builtins map[string]Builtin + // Vars is a table of variables that builtins can use to store and retrieve + // state, such as via the define() and var() builtins. + Vars map[string][]byte + + // Position is the current position at which parsing should + // resume. The Offset field is used for indexing into Input; the remaining + // fields are used for error-reporting. + pos Position +} + +// NewScanner creates a new scanner for parsing the given input. +func NewScanner(input string) *Scanner { + s := &Scanner{Input: input} + setDefaultBuiltins(s) + return s +} + +// SetFile sets the file path shown in this Scanner's error reports. +func (s *Scanner) SetFile(path string) { + s.pos.File = path +} + +// Exec consumes tokens until Input is exhausted, returning the resulting +// encoded maybe-DER. +func (s *Scanner) Exec() ([]byte, error) { + enc, _, err := s.exec(nil) + return enc, err +} + +// isEOF returns whether the cursor is at least n bytes ahead of the end of the +// input. +func (s *Scanner) isEOF(n int) bool { + return s.pos.Offset+n >= len(s.Input) +} + +// advance advances the scanner's cursor n positions. +// +// Unlike just s.pos.Offset += n, this will not proceed beyond the end of the +// string, and will update the line and column information accordingly. +func (s *Scanner) advance(n int) { + for i := 0; i < n && !s.isEOF(0); i++ { + if s.Input[s.pos.Offset] == '\n' { + s.pos.Line++ + s.pos.Column = 0 + } else { + s.pos.Column++ + } + s.pos.Offset++ + } +} + +// consume advances exactly n times and returns all source bytes between the +// initial cursor position and excluding the final cursor position. +// +// If EOF is reached before all n bytes are consumed, the function returns +// false. +func (s *Scanner) consume(n int) (string, bool) { + start := s.pos.Offset + s.advance(n) + if s.pos.Offset-start != n { + return "", false + } + + return s.Input[start:s.pos.Offset], true +} + +// consumeUntil advances the cursor until the given byte is seen, returning all +// source bytes between the initial cursor position and excluding the given +// byte. This function will advance past the searched-for byte. +// +// If EOF is reached before the byte is seen, the function returns false. +func (s *Scanner) consumeUntil(b byte) (string, bool) { + if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 { + text, _ := s.consume(i + 1) + return text[:i], true + } + return "", false +} + +// parseEscapeSequence parses a DER-ASCII escape sequence, returning the rune +// it escapes. +// +// Valid escapes are: +// \n \" \\ \xNN \uNNNN \UNNNNNNNN +// +// This function assumes that the scanner's cursor is currently on a \ rune. +func (s *Scanner) parseEscapeSequence() (rune, error) { + s.advance(1) // Skip the \. The caller is assumed to have validated it. + if s.isEOF(0) { + return 0, &ParseError{s.pos, errors.New("expected escape character")} + } + + switch c := s.Input[s.pos.Offset]; c { + case 'n': + s.advance(1) + return '\n', nil + case '"', '\\': + s.advance(1) + return rune(c), nil + case 'x', 'u', 'U': + s.advance(1) + + var digits int + switch c { + case 'x': + digits = 2 + case 'u': + digits = 4 + case 'U': + digits = 8 + } + + hexes, ok := s.consume(digits) + if !ok { + return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} + } + + bytes, err := hex.DecodeString(hexes) + if err != nil { + return 0, &ParseError{s.pos, err} + } + + var r rune + for _, b := range bytes { + r <<= 8 + r |= rune(b) + } + return r, nil + default: + return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} + } +} + +// parseQuotedString parses a UTF-8 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a " rune. +func (s *Scanner) parseQuotedString() (token, error) { + s.advance(1) // Skip the ". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + switch c := s.Input[s.pos.Offset]; c { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + escapeStart := s.pos + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + if r > 0xff { + // TODO(davidben): Alternatively, should these encode as UTF-8? + return token{}, &ParseError{escapeStart, errors.New("illegal escape for quoted string")} + } + bytes = append(bytes, byte(r)) + default: + s.advance(1) + bytes = append(bytes, c) + } + } +} + +// parseUTF16String parses a UTF-16 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a u followed +// by a " rune. +func (s *Scanner) parseUTF16String() (token, error) { + s.advance(2) // Skip the u". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + + switch s.Input[s.pos.Offset] { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + bytes = appendUTF16(bytes, r) + default: + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) + // Note DecodeRuneInString may return utf8.RuneError if there is a + // legitimate replacement character in the input. The documentation + // says errors return (RuneError, 0) or (RuneError, 1). + if r == utf8.RuneError && n <= 1 { + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} + } + s.advance(n) + bytes = appendUTF16(bytes, r) + } + } +} + +// parseUTF32String parses a UTF-32 string until the next ". +// +// This function assumes that the scanner's cursor is currently on a U followed +// by a " rune. +func (s *Scanner) parseUTF32String() (token, error) { + s.advance(2) // Skip the U". The caller is assumed to have validated it. + start := s.pos + var bytes []byte + for { + if s.isEOF(0) { + return token{}, &ParseError{start, errors.New("unmatched \"")} + } + + switch s.Input[s.pos.Offset] { + case '"': + s.advance(1) + return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil + case '\\': + r, err := s.parseEscapeSequence() + if err != nil { + return token{}, err + } + bytes = appendUTF32(bytes, r) + default: + r, n := utf8.DecodeRuneInString(s.Input[s.pos.Offset:]) + // Note DecodeRuneInString may return utf8.RuneError if there is a + // legitimate replacement charaacter in the input. The documentation + // says errors return (RuneError, 0) or (RuneError, 1). + if r == utf8.RuneError && n <= 1 { + return token{}, &ParseError{s.pos, errors.New("invalid UTF-8")} + } + s.advance(n) + bytes = appendUTF32(bytes, r) + } + } +} + +// next lexes the next token. +func (s *Scanner) next() (token, error) { +again: + if s.isEOF(0) { + return token{Kind: tokenEOF, Pos: s.pos}, nil + } + + switch s.Input[s.pos.Offset] { + case ' ', '\t', '\n', '\r': + // Skip whitespace. + s.advance(1) + goto again + case '#': + // Skip to the end of the comment. + s.advance(1) + for !s.isEOF(0) { + wasNewline := s.Input[s.pos.Offset] == '\n' + s.advance(1) + if wasNewline { + break + } + } + goto again + case '{': + s.advance(1) + return token{Kind: tokenLeftCurly, Pos: s.pos}, nil + case '}': + s.advance(1) + return token{Kind: tokenRightCurly, Pos: s.pos}, nil + case ',': + s.advance(1) + return token{Kind: tokenComma, Pos: s.pos}, nil + case '(': + s.advance(1) + return token{Kind: tokenLeftParen, Pos: s.pos}, nil + case ')': + s.advance(1) + return token{Kind: tokenRightParen, Pos: s.pos}, nil + case '"': + return s.parseQuotedString() + case 'u': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { + return s.parseUTF16String() + } + case 'U': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '"' { + return s.parseUTF32String() + } + case 'b': + if !s.isEOF(1) && s.Input[s.pos.Offset+1] == '`' { + s.advance(2) // Skip the b`. + bitStr, ok := s.consumeUntil('`') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched `")} + } + + // The leading byte is the number of "extra" bits at the end. + var bitCount int + var sawPipe bool + value := []byte{0} + for i, r := range bitStr { + switch r { + case '0', '1': + if bitCount%8 == 0 { + value = append(value, 0) + } + if r == '1' { + value[bitCount/8+1] |= 1 << uint(7-bitCount%8) + } + bitCount++ + case '|': + if sawPipe { + return token{}, &ParseError{s.pos, errors.New("duplicate |")} + } + + // bitsRemaining is the number of bits remaining in the output that haven't + // been used yet. There cannot be more than that many bits past the |. + bitsRemaining := (len(value)-1)*8 - bitCount + inputRemaining := len(bitStr) - i - 1 + if inputRemaining > bitsRemaining { + return token{}, &ParseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} + } + + sawPipe = true + value[0] = byte(bitsRemaining) + default: + return token{}, &ParseError{s.pos, fmt.Errorf("unexpected rune %q", r)} + } + } + if !sawPipe { + value[0] = byte((len(value)-1)*8 - bitCount) + } + return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil + } + case '`': + s.advance(1) + hexStr, ok := s.consumeUntil('`') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched `")} + } + bytes, err := hex.DecodeString(hexStr) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil + case '[': + s.advance(1) + tagStr, ok := s.consumeUntil(']') + if !ok { + return token{}, &ParseError{s.pos, errors.New("unmatched [")} + } + tag, err := decodeTagString(tagStr) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + value, err := appendTag(nil, tag) + if err != nil { + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil + } + + // Normal token. Consume up to the next whitespace character, symbol, or + // EOF. + start := s.pos + s.advance(1) +loop: + for !s.isEOF(0) { + switch s.Input[s.pos.Offset] { + case ' ', '\t', '\n', '\r', ',', '(', ')', '{', '}', '[', ']', '`', '"', '#': + break loop + default: + s.advance(1) + } + } + + symbol := s.Input[start.Offset:s.pos.Offset] + + // See if it is a tag. + tag, ok := internal.TagByName(symbol) + if ok { + value, err := appendTag(nil, tag) + if err != nil { + // This is impossible; built-in tags always encode. + return token{}, &ParseError{s.pos, err} + } + return token{Kind: tokenBytes, Value: value, Pos: start}, nil + } + + if regexpInteger.MatchString(symbol) { + value, err := strconv.ParseInt(symbol, 10, 64) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil + } + + if regexpOID.MatchString(symbol) { + oidStr := strings.Split(symbol, ".") + var oid []uint32 + for _, s := range oidStr { + u, err := strconv.ParseUint(s, 10, 32) + if err != nil { + return token{}, &ParseError{start, err} + } + oid = append(oid, uint32(u)) + } + der, err := appendObjectIdentifier(nil, oid) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil + } + + if symbol == "TRUE" { + return token{Kind: tokenBytes, Value: []byte{0xff}, Pos: s.pos}, nil + } + + if symbol == "FALSE" { + return token{Kind: tokenBytes, Value: []byte{0x00}, Pos: s.pos}, nil + } + + if symbol == "indefinite" { + return token{Kind: tokenIndefinite}, nil + } + + if isLongFormOverride(symbol) { + l, err := decodeLongFormOverride(symbol) + if err != nil { + return token{}, &ParseError{start, err} + } + return token{Kind: tokenLongForm, Length: l}, nil + } + + return token{Kind: tokenWord, Value: []byte(symbol), Pos: s.pos}, nil +} + +// exec is the main parser loop. +// +// Because we need to consume all of the tokens between delimiters (e.g. for +// computing the length of the contents of {} or counting arguments in ()), this +// function needs to recurse into itself; the left parameter, when non-nil, +// refers to the left delimiter that triggered the recursion. +// +// This function returns when: it sees an EOF; it sees a comma; it sees the +// matching right-delimiter to left. It returns the encoded contents of the the +// recognized tokens and all of the tokens that were recognized, including +// the token that ended parsing. +func (s *Scanner) exec(left *token) ([]byte, []token, error) { + var out []byte + var tokens []token + var lengthModifier *token + var word *token + for { + token, err := s.next() + if err != nil { + return nil, nil, err + } + tokens = append(tokens, token) + if lengthModifier != nil && token.Kind != tokenLeftCurly { + return nil, nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} + } + if word != nil && token.Kind != tokenLeftParen { + return nil, nil, &ParseError{word.Pos, fmt.Errorf("unrecognized symbol %q", string(token.Value))} + } + switch token.Kind { + case tokenBytes: + out = append(out, token.Value...) + case tokenLeftCurly: + child, _, err := s.exec(&token) + if err != nil { + return nil, nil, err + } + var lengthOverride int + if lengthModifier != nil { + if lengthModifier.Kind == tokenIndefinite { + out = append(out, 0x80) + out = append(out, child...) + out = append(out, 0x00, 0x00) + lengthModifier = nil + break + } + if lengthModifier.Kind == tokenLongForm { + lengthOverride = lengthModifier.Length + } + } + out, err = appendLength(out, len(child), lengthOverride) + if err != nil { + // appendLength may fail if the lengthModifier was incompatible. + return nil, tokens, &ParseError{lengthModifier.Pos, err} + } + out = append(out, child...) + lengthModifier = nil + case tokenLeftParen: + if word == nil { + return nil, tokens, &ParseError{token.Pos, errors.New("missing function name")} + } + var args [][]byte + argLoop: + for { + arg, prev, err := s.exec(&token) + if err != nil { + return nil, tokens, err + } + args = append(args, arg) + lastToken := prev[len(prev)-1] + switch lastToken.Kind { + case tokenComma: + if len(prev) < 2 { + return nil, nil, &ParseError{lastToken.Pos, errors.New("function arguments cannot be empty")} + } + case tokenRightParen: + if len(prev) < 2 { + // Actually foo(), so the argument list is nil. + args = nil + } + break argLoop + default: + return nil, nil, &ParseError{lastToken.Pos, errors.New("expected ',' or ')'")} + } + } + bytes, err := s.executeBuiltin(string(word.Value), args) + if err != nil { + return nil, nil, err + } + word = nil + out = append(out, bytes...) + case tokenRightCurly: + if left != nil && left.Kind == tokenLeftCurly { + return out, tokens, nil + } + return nil, nil, &ParseError{token.Pos, errors.New("unmatched '}'")} + case tokenRightParen: + if left != nil && left.Kind == tokenLeftParen { + return out, tokens, nil + } + return nil, nil, &ParseError{token.Pos, errors.New("unmatched '('")} + case tokenLongForm, tokenIndefinite: + lengthModifier = &token + case tokenComma: + return out, tokens, nil + case tokenWord: + word = &token + case tokenEOF: + if left == nil { + return out, tokens, nil + } else if left.Kind == tokenLeftCurly { + return nil, nil, &ParseError{left.Pos, errors.New("unmatched '{'")} + } else { + return nil, nil, &ParseError{left.Pos, errors.New("unmatched '('")} + } + default: + panic(token) + } + } +} + +func (s *Scanner) executeBuiltin(name string, args [][]byte) ([]byte, error) { + builtin, ok := s.Builtins[name] + if !ok { + return nil, fmt.Errorf("unrecognized builtin %q", name) + } + + return builtin(args) +} diff --git a/cmd/ascii2der/scanner_test.go b/ascii2der/scanner_test.go similarity index 93% rename from cmd/ascii2der/scanner_test.go rename to ascii2der/scanner_test.go index cac14d2..a3fd0b8 100644 --- a/cmd/ascii2der/scanner_test.go +++ b/ascii2der/scanner_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "bytes" @@ -94,11 +94,6 @@ indefinite long-form:2`, }, true, }, - // Garbage tokens. - {"SEQUENC", nil, false}, - {"1...2", nil, false}, - {"true", nil, false}, - {"false", nil, false}, // Unmatched [. {"[SEQUENCE", nil, false}, // Unmatched ". @@ -383,9 +378,9 @@ indefinite long-form:2`, } func scanAll(in string) (tokens []token, ok bool) { - scanner := newScanner(in) + scanner := NewScanner(in) for { - token, err := scanner.Next() + token, err := scanner.next() if err != nil { return } @@ -426,9 +421,18 @@ var asciiToDERTests = []struct { ok bool }{ {"SEQUENCE { INTEGER { 42 } INTEGER { 1 } }", []byte{0x30, 0x06, 0x02, 0x01, 0x2a, 0x02, 0x01, 0x01}, true}, - // Mismatched curlies. + // Garbage words. + {"SEQUENC", nil, false}, + {"1...2", nil, false}, + {"true", nil, false}, + {"false", nil, false}, + // Mismatched brackets. {"{", nil, false}, {"}", nil, false}, + {"(", nil, false}, + {")", nil, false}, + {"({)}", nil, false}, + {"{(})", nil, false}, // Invalid token. {"BOGUS", nil, false}, // Length overrides. @@ -442,11 +446,33 @@ var asciiToDERTests = []struct { // Too long of length modifiers. {"[long-form:1 99999]", nil, false}, {"SEQUENCE long-form:1 { `" + strings.Repeat("a", 1024) + "` }", nil, false}, + // Function call without function name. + {"()", nil, false}, + // Unknown function. + {"bogus()", nil, false}, + // Basic variable usage. + {`define("foo", 42) var("foo") var("foo")`, []byte{42, 42}, true}, + { + ` + define(42, 42) var(42) + define(42, "a") var(42) + `, + []byte{42, byte('a')}, + true, + }, + {`var("missing")`, nil, false}, + {`var("missing", 42)`, []byte{42}, true}, + // Empty parens -> zero args. + // TODO(mcyoung): if we ever add a zero-argument function, use it in this + // test, instead. + {"var()", nil, false}, + // Empty token streams are not valid arguments. + {"define(, 42)", nil, false}, } func TestASCIIToDER(t *testing.T) { for i, tt := range asciiToDERTests { - out, err := asciiToDER(tt.in) + out, err := NewScanner(tt.in).Exec() ok := err == nil if !tt.ok { if ok { diff --git a/cmd/ascii2der/values.go b/ascii2der/values.go similarity index 99% rename from cmd/ascii2der/values.go rename to ascii2der/values.go index 03e82dc..d8406e0 100644 --- a/cmd/ascii2der/values.go +++ b/ascii2der/values.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "errors" diff --git a/cmd/ascii2der/values_test.go b/ascii2der/values_test.go similarity index 99% rename from cmd/ascii2der/values_test.go rename to ascii2der/values_test.go index 17d6a9c..ea74213 100644 --- a/cmd/ascii2der/values_test.go +++ b/ascii2der/values_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package main +package ascii2der import ( "testing" diff --git a/cmd/ascii2der/main.go b/cmd/ascii2der/main.go index ca5c73d..a32083b 100644 --- a/cmd/ascii2der/main.go +++ b/cmd/ascii2der/main.go @@ -20,12 +20,72 @@ import ( "fmt" "io/ioutil" "os" + "strings" + + "github.com/google/der-ascii/ascii2der" ) +// pairs conforms to flag.Value. Each time Set() is called, it collects another +// k=v pair into itself. +type pairs map[string]string + +func (p pairs) String() string { + return "" +} + +func (p pairs) Set(pair string) error { + if pair == "" || p == nil { + return nil + } + + split := strings.SplitN(pair, "=", 2) + if len(split) != 2 { + return fmt.Errorf("missing \"=\": %q", pair) + } + + p[split[0]] = split[1] + return nil +} + +var defines = make(map[string]string) +var fileDefines = make(map[string]string) + +func init() { + flag.Var(pairs(defines), "d", + `pair of the form a=b; define("a", "b") is inserted at the start of the input`+ + "\nmay occur multiple times") + flag.Var(pairs(fileDefines), "df", + `like -d, except the second value is interpreted as a binary file to read`+ + "\nmay occur multiple times") +} + var inPath = flag.String("i", "", "input file to use (defaults to stdin)") var outPath = flag.String("o", "", "output file to use (defaults to stdout)") var pemType = flag.String("pem", "", "if provided, format the output as a PEM block with this type") +func readAll(path string) []byte { + var file *os.File + if path == "" { + file = os.Stdin + } else { + var err error + file, err = os.Open(path) + if err != nil { + fmt.Fprintf(os.Stderr, "Error opening %s: %s\n", path, err) + os.Exit(1) + } + defer file.Close() + } + + buf, err := ioutil.ReadAll(file) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading %s: %s\n", path, err) + os.Exit(1) + } + + return buf +} + func main() { flag.Parse() @@ -35,24 +95,27 @@ func main() { os.Exit(1) } - inFile := os.Stdin - if *inPath != "" { - var err error - inFile, err = os.Open(*inPath) - if err != nil { - fmt.Fprintf(os.Stderr, "Error opening %s: %s\n", *inPath, err) + inBytes := readAll(*inPath) + scanner := ascii2der.NewScanner(string(inBytes)) + scanner.SetFile(*inPath) + + scanner.Vars = make(map[string][]byte) + for k, v := range defines { + if _, ok := scanner.Vars[k]; ok { + fmt.Fprintf(os.Stderr, "Error: tried to define %q with flags twice\n", k) os.Exit(1) } - defer inFile.Close() + scanner.Vars[k] = []byte(v) } - - inBytes, err := ioutil.ReadAll(inFile) - if err != nil { - fmt.Fprintf(os.Stderr, "Error reading input: %s\n", err) - os.Exit(1) + for k, v := range fileDefines { + if _, ok := scanner.Vars[k]; ok { + fmt.Fprintf(os.Stderr, "Error: tried to define %q with flags twice\n", k) + os.Exit(1) + } + scanner.Vars[k] = readAll(v) } - outBytes, err := asciiToDER(string(inBytes)) + outBytes, err := scanner.Exec() if err != nil { fmt.Fprintf(os.Stderr, "Syntax error: %s\n", err) os.Exit(1) diff --git a/cmd/ascii2der/scanner.go b/cmd/ascii2der/scanner.go deleted file mode 100644 index f4c49f2..0000000 --- a/cmd/ascii2der/scanner.go +++ /dev/null @@ -1,532 +0,0 @@ -// Copyright 2015 The DER ASCII Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package main - -import ( - "encoding/hex" - "errors" - "fmt" - "regexp" - "strconv" - "strings" - "unicode/utf16" - "unicode/utf8" - - "github.com/google/der-ascii/internal" -) - -// A position describes a location in the input stream. -type position struct { - Offset int // offset, starting at 0 - Line int // line number, starting at 1 - Column int // column number, starting at 1 (byte count) -} - -// A tokenKind is a kind of token. -type tokenKind int - -const ( - tokenBytes tokenKind = iota - tokenLeftCurly - tokenRightCurly - tokenIndefinite - tokenLongForm - tokenEOF -) - -// A parseError is an error during parsing DER ASCII. -type parseError struct { - Pos position - Err error -} - -func (t *parseError) Error() string { - return fmt.Sprintf("line %d: %s", t.Pos.Line, t.Err) -} - -// A token is a token in a DER ASCII file. -type token struct { - // Kind is the kind of the token. - Kind tokenKind - // Value, for a tokenBytes token, is the decoded value of the token in - // bytes. - Value []byte - // Pos is the position of the first byte of the token. - Pos position - // Length, for a tokenLongForm token, is the number of bytes to use to - // encode the length, not including the initial one. - Length int -} - -var ( - regexpInteger = regexp.MustCompile(`^-?[0-9]+$`) - regexpOID = regexp.MustCompile(`^[0-9]+(\.[0-9]+)+$`) -) - -type scanner struct { - text string - pos position -} - -func newScanner(text string) *scanner { - return &scanner{text: text, pos: position{Line: 1}} -} - -func (s *scanner) parseEscapeSequence() (rune, error) { - s.advance() // Skip the \. The caller is assumed to have validated it. - if s.isEOF() { - return 0, &parseError{s.pos, errors.New("expected escape character")} - } - switch c := s.text[s.pos.Offset]; c { - case 'n': - s.advance() - return '\n', nil - case '"', '\\': - s.advance() - return rune(c), nil - case 'x': - s.advance() - if s.pos.Offset+2 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+2]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(2) - return rune(b[0]), nil - case 'u': - s.advance() - if s.pos.Offset+4 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+4]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(4) - return rune(b[0])<<8 | rune(b[1]), nil - case 'U': - s.advance() - if s.pos.Offset+8 > len(s.text) { - return 0, &parseError{s.pos, errors.New("unfinished escape sequence")} - } - b, err := hex.DecodeString(s.text[s.pos.Offset : s.pos.Offset+8]) - if err != nil { - return 0, &parseError{s.pos, err} - } - s.advanceBytes(8) - return rune(b[0])<<24 | rune(b[1])<<16 | rune(b[2])<<8 | rune(b[3]), nil - default: - return 0, &parseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} - } -} - -func (s *scanner) parseQuotedString() (token, error) { - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - escapeStart := s.pos - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - if r > 0xff { - // TODO(davidben): Alternatively, should these encode as UTF-8? - return token{}, &parseError{escapeStart, errors.New("illegal escape for quoted string")} - } - bytes = append(bytes, byte(r)) - default: - s.advance() - bytes = append(bytes, c) - } - } -} - -func appendUTF16(b []byte, r rune) []byte { - if r <= 0xffff { - // Note this logic intentionally tolerates unpaired surrogates. - return append(b, byte(r>>8), byte(r)) - } - - r1, r2 := utf16.EncodeRune(r) - b = append(b, byte(r1>>8), byte(r1)) - b = append(b, byte(r2>>8), byte(r2)) - return b -} - -func (s *scanner) parseUTF16String() (token, error) { - s.advance() // Skip the u. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - bytes = appendUTF16(bytes, r) - default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) - // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation - // says errors return (RuneError, 0) or (RuneError, 1). - if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} - } - s.advanceBytes(n) - bytes = appendUTF16(bytes, r) - } - } -} - -func appendUTF32(b []byte, r rune) []byte { - return append(b, byte(r>>24), byte(r>>16), byte(r>>8), byte(r)) -} - -func (s *scanner) parseUTF32String() (token, error) { - s.advance() // Skip the U. The caller is assumed to have validated it. - s.advance() // Skip the ". The caller is assumed to have validated it. - start := s.pos - var bytes []byte - for { - if s.isEOF() { - return token{}, &parseError{start, errors.New("unmatched \"")} - } - switch c := s.text[s.pos.Offset]; c { - case '"': - s.advance() - return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil - case '\\': - r, err := s.parseEscapeSequence() - if err != nil { - return token{}, err - } - bytes = appendUTF32(bytes, r) - default: - r, n := utf8.DecodeRuneInString(s.text[s.pos.Offset:]) - // Note DecodeRuneInString may return utf8.RuneError if there is a - // legitimate replacement charaacter in the input. The documentation - // says errors return (RuneError, 0) or (RuneError, 1). - if r == utf8.RuneError && n <= 1 { - return token{}, &parseError{s.pos, errors.New("invalid UTF-8")} - } - s.advanceBytes(n) - bytes = appendUTF32(bytes, r) - } - } -} - -func (s *scanner) Next() (token, error) { -again: - if s.isEOF() { - return token{Kind: tokenEOF, Pos: s.pos}, nil - } - - switch s.text[s.pos.Offset] { - case ' ', '\t', '\n', '\r': - // Skip whitespace. - s.advance() - goto again - case '#': - // Skip to the end of the comment. - s.advance() - for !s.isEOF() { - wasNewline := s.text[s.pos.Offset] == '\n' - s.advance() - if wasNewline { - break - } - } - goto again - case '{': - s.advance() - return token{Kind: tokenLeftCurly, Pos: s.pos}, nil - case '}': - s.advance() - return token{Kind: tokenRightCurly, Pos: s.pos}, nil - case '"': - return s.parseQuotedString() - case 'u': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { - return s.parseUTF16String() - } - case 'U': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '"' { - return s.parseUTF32String() - } - case 'b': - if s.pos.Offset+1 < len(s.text) && s.text[s.pos.Offset+1] == '`' { - s.advance() // Skip the b. - s.advance() // Skip the `. - bitStr, ok := s.consumeUpTo('`') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} - } - - // The leading byte is the number of "extra" bits at the end. - var bitCount int - var sawPipe bool - value := []byte{0} - for i, r := range bitStr { - switch r { - case '0', '1': - if bitCount%8 == 0 { - value = append(value, 0) - } - if r == '1' { - value[bitCount/8+1] |= 1 << uint(7-bitCount%8) - } - bitCount++ - case '|': - if sawPipe { - return token{}, &parseError{s.pos, errors.New("duplicate |")} - } - - // bitsRemaining is the number of bits remaining in the output that haven't - // been used yet. There cannot be more than that many bits past the |. - bitsRemaining := (len(value)-1)*8 - bitCount - inputRemaining := len(bitStr) - i - 1 - if inputRemaining > bitsRemaining { - return token{}, &parseError{s.pos, fmt.Errorf("expected at most %v explicit padding bits; found %v", bitsRemaining, inputRemaining)} - } - - sawPipe = true - value[0] = byte(bitsRemaining) - default: - return token{}, &parseError{s.pos, fmt.Errorf("unexpected rune %q", r)} - } - } - if !sawPipe { - value[0] = byte((len(value)-1)*8 - bitCount) - } - return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil - } - case '`': - s.advance() - hexStr, ok := s.consumeUpTo('`') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched `")} - } - bytes, err := hex.DecodeString(hexStr) - if err != nil { - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil - case '[': - s.advance() - tagStr, ok := s.consumeUpTo(']') - if !ok { - return token{}, &parseError{s.pos, errors.New("unmatched [")} - } - tag, err := decodeTagString(tagStr) - if err != nil { - return token{}, &parseError{s.pos, err} - } - value, err := appendTag(nil, tag) - if err != nil { - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: value, Pos: s.pos}, nil - } - - // Normal token. Consume up to the next whitespace character, symbol, or - // EOF. - start := s.pos - s.advance() -loop: - for !s.isEOF() { - switch s.text[s.pos.Offset] { - case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#': - break loop - default: - s.advance() - } - } - - symbol := s.text[start.Offset:s.pos.Offset] - - // See if it is a tag. - tag, ok := internal.TagByName(symbol) - if ok { - value, err := appendTag(nil, tag) - if err != nil { - // This is impossible; built-in tags always encode. - return token{}, &parseError{s.pos, err} - } - return token{Kind: tokenBytes, Value: value, Pos: start}, nil - } - - if regexpInteger.MatchString(symbol) { - value, err := strconv.ParseInt(symbol, 10, 64) - if err != nil { - return token{}, &parseError{start, err} - } - return token{Kind: tokenBytes, Value: appendInteger(nil, value), Pos: s.pos}, nil - } - - if regexpOID.MatchString(symbol) { - oidStr := strings.Split(symbol, ".") - var oid []uint32 - for _, s := range oidStr { - u, err := strconv.ParseUint(s, 10, 32) - if err != nil { - return token{}, &parseError{start, err} - } - oid = append(oid, uint32(u)) - } - der, ok := appendObjectIdentifier(nil, oid) - if !ok { - return token{}, errors.New("invalid OID") - } - return token{Kind: tokenBytes, Value: der, Pos: s.pos}, nil - } - - if symbol == "TRUE" { - return token{Kind: tokenBytes, Value: []byte{0xff}, Pos: s.pos}, nil - } - - if symbol == "FALSE" { - return token{Kind: tokenBytes, Value: []byte{0x00}, Pos: s.pos}, nil - } - - if symbol == "indefinite" { - return token{Kind: tokenIndefinite}, nil - } - - if isLongFormOverride(symbol) { - l, err := decodeLongFormOverride(symbol) - if err != nil { - return token{}, &parseError{start, err} - } - return token{Kind: tokenLongForm, Length: l}, nil - } - - return token{}, fmt.Errorf("unrecognized symbol %q", symbol) -} - -func (s *scanner) isEOF() bool { - return s.pos.Offset >= len(s.text) -} - -func (s *scanner) advance() { - if !s.isEOF() { - if s.text[s.pos.Offset] == '\n' { - s.pos.Line++ - s.pos.Column = 0 - } else { - s.pos.Column++ - } - s.pos.Offset++ - } -} - -func (s *scanner) advanceBytes(n int) { - for i := 0; i < n; i++ { - s.advance() - } -} - -func (s *scanner) consumeUpTo(b byte) (string, bool) { - start := s.pos.Offset - for !s.isEOF() { - if s.text[s.pos.Offset] == b { - ret := s.text[start:s.pos.Offset] - s.advance() - return ret, true - } - s.advance() - } - return "", false -} - -func asciiToDERImpl(scanner *scanner, leftCurly *token) ([]byte, error) { - var out []byte - var lengthModifier *token - for { - token, err := scanner.Next() - if err != nil { - return nil, err - } - if lengthModifier != nil && token.Kind != tokenLeftCurly { - return nil, &parseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{'")} - } - switch token.Kind { - case tokenBytes: - out = append(out, token.Value...) - case tokenLeftCurly: - child, err := asciiToDERImpl(scanner, &token) - if err != nil { - return nil, err - } - var lengthOverride int - if lengthModifier != nil { - if lengthModifier.Kind == tokenIndefinite { - out = append(out, 0x80) - out = append(out, child...) - out = append(out, 0x00, 0x00) - lengthModifier = nil - break - } - if lengthModifier.Kind == tokenLongForm { - lengthOverride = lengthModifier.Length - } - } - out, err = appendLength(out, len(child), lengthOverride) - if err != nil { - // appendLength may fail if the lengthModifier was incompatible. - return nil, &parseError{lengthModifier.Pos, err} - } - out = append(out, child...) - lengthModifier = nil - case tokenRightCurly: - if leftCurly != nil { - return out, nil - } - return nil, &parseError{token.Pos, errors.New("unmatched '}'")} - case tokenLongForm, tokenIndefinite: - lengthModifier = &token - case tokenEOF: - if leftCurly == nil { - return out, nil - } - return nil, &parseError{leftCurly.Pos, errors.New("unmatched '{'")} - default: - panic(token) - } - } -} - -func asciiToDER(input string) ([]byte, error) { - scanner := newScanner(input) - return asciiToDERImpl(scanner, nil) -} diff --git a/language.txt b/language.txt index 1b4f774..8cb9849 100644 --- a/language.txt +++ b/language.txt @@ -151,7 +151,7 @@ FALSE [PRIVATE 2] [UNIVERSAL 16] # This is a SEQUENCE. [UNIVERSAL 2 PRIMITIVE] # This is an INTEGER. -[long-form:2 UNIVERSAL 2 PRIMTIVE] # This is `1f0002` instead of `02`. +[long-form:2 UNIVERSAL 2 PRIMITIVE] # This is `1f0002` instead of `02`. # As a shorthand, one may write type names from ASN.1, replacing spaces with # underscore. These specify tag, number, and the constructed bit. The @@ -236,7 +236,6 @@ INTEGER long-form:1 { 5 } INTEGER { `00ff` } } - # Examples. # These primitives may be combined with raw byte strings to produce other @@ -278,6 +277,39 @@ SEQUENCE `aabbcc` INTEGER { 2 } +# DER ASCII provides a selection of builtin functions for building more +# complex DER structures. Builtins are spelled builtin(arg1, arg2, ...), where +# each argument is an arbitrary but non-empty sequence of DER ASCII tokens. +# Tokens are fully expanded (but not emitted) before the builtin executes. +# Builtin calls expand to a byte string just like any other token. +# +# There is explicitly no support for user-defined functions. +# +# The syntax and output of anything using builtins is subject to change, and +# doesn't have the same stability expectations as the rest of DER ASCII. + +# define(var, value) defines a variable to the given value. The name of the +# variable may be an arbitrary byte string. Variables may be redefined at any +# point. define() always expands to an empty byte string. +define("payload", SEQUENCE { + INTEGER { 1 } + OCTET_STRING { "hello, world" } +}) +define(`ffff`, "payload") + +# var(var) expands to a variable previously defined with define(). The main use +# of var() is to factor a complex structure that will be repeated many times, +# such as complex issuer and subject fields in a self-signed X.509 cert. +# +# It is an error to access a var() that has not been previously defined. +var("payload") +var(var(`ffff`)) # Same as above, since var(`ffff`) expands to "payload". + +# sign(algo, key, message) expands to a digital signature for message, +# using the given algorithm string (e.g. "ECDSA_SHA256") and private key. The +# supported key formats and algorithms can be found in +# cmd/ascii2der/builtins.go. + # Disassembler. # Although the conversion from DER ASCII to a byte string is well-defined, the diff --git a/samples/cert_with_sign.txt b/samples/cert_with_sign.txt new file mode 100644 index 0000000..7a7731d --- /dev/null +++ b/samples/cert_with_sign.txt @@ -0,0 +1,147 @@ +# This is the same certificate as cert.txt, but with the signature generated +# using sign(). +# +# ascii2der will assemble both files to equal byte strings, because RSA-SSA +# happens to be deterministic. + +# Our private key, in PKCS #8 form. +define("my_key", SEQUENCE { + INTEGER { 0 } + SEQUENCE { + # rsaEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.1 } + NULL {} + } + OCTET_STRING { + SEQUENCE { + INTEGER { 0 } + INTEGER { `00d82bc8a632e462ff4df3d0ad598b45a7bdf147bf09587b22bd35ae97258694a080c0b41f7691674631d01084b7221e70239172c8e96d793a8577800fc4951675c54a714cc8633fa3f2639c2a4f9afacbc1716e288528a0271e651cae07d55b6f2d43ed2b90b18caf246daee9173a05c1bfb81cae653b1b58c2d9aed6aa6788f1` } + INTEGER { 65537 } + INTEGER { `008072d3d15de033aafc88e9f0778ab8230a4c7a935b5c461ec84b43a8f0555daf599227f5a220983b2f92309e8bab2c66f9db8d5730cd2a01ca18cdf1909ffe2d791c40960c5161ea0b743f9cf43270a1c33666bdab55fc55c24f48fe5a618d07f8247a12a31972cb7234dbe9d45854948266156e38b2dc6d6215d74820809401` } + INTEGER { `00f12f2c19ee1ecf2c999b87bdafde60eace3790faad8f9adec13b14c6dfb69f8795a1d0fe65494250b59534014b918453042012952ae6f5786342999600725491` } + INTEGER { `00e57341d15469ec0bb5d389a0f0ada58a18d73776d9e69ef134049a918e475d4bea46f12d0b2468c972fc33a739a6bcdada8019376a0c466048d98278a2a49e61` } + INTEGER { `0be99d8f0650e540b9b191e9cf96f74881b902e32ed169ffd8a1776c3f3e80f0ac765aa14615713e1549f250a20fe4ee48c4e0c6176162fc7842a0dd64d640d1` } + INTEGER { `00e4d74e168bdd5499dd4fcc5d228ddda35ce111254d7010a7ba5cb91860d1d64007b99782783168fd39dc455c0c48bae47fb5f0f06ea92d6b8c5cbb1ebbfff921` } + INTEGER { `00bef4572c74da6ba545cd36a288ef12685b07577950c973ad32b0690798dd9a86568231ef0765bd0a49fbb03aac3c1f94dadc97d23a03750132ba230408363ca1` } + } + } +}) + +# The "to be signed" portion of our cert. +define("tbs_cert", SEQUENCE { + [0] { + INTEGER { 2 } + } + INTEGER { `00fbb04c2eab109b0c` } + SEQUENCE { + # sha1WithRSAEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.5 } + NULL {} + } + SEQUENCE { + SET { + SEQUENCE { + # countryName + OBJECT_IDENTIFIER { 2.5.4.6 } + PrintableString { "AU" } + } + } + SET { + SEQUENCE { + # stateOrProvinceName + OBJECT_IDENTIFIER { 2.5.4.8 } + UTF8String { "Some-State" } + } + } + SET { + SEQUENCE { + # organizationName + OBJECT_IDENTIFIER { 2.5.4.10 } + UTF8String { "Internet Widgits Pty Ltd" } + } + } + } + SEQUENCE { + UTCTime { "140423205040Z" } + UTCTime { "170422205040Z" } + } + SEQUENCE { + SET { + SEQUENCE { + # countryName + OBJECT_IDENTIFIER { 2.5.4.6 } + PrintableString { "AU" } + } + } + SET { + SEQUENCE { + # stateOrProvinceName + OBJECT_IDENTIFIER { 2.5.4.8 } + UTF8String { "Some-State" } + } + } + SET { + SEQUENCE { + # organizationName + OBJECT_IDENTIFIER { 2.5.4.10 } + UTF8String { "Internet Widgits Pty Ltd" } + } + } + } + SEQUENCE { + SEQUENCE { + # rsaEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.1 } + NULL {} + } + BIT_STRING { + `00` + SEQUENCE { + INTEGER { `00d82bc8a632e462ff4df3d0ad598b45a7bdf147bf09587b22bd35ae97258694a080c0b41f7691674631d01084b7221e70239172c8e96d793a8577800fc4951675c54a714cc8633fa3f2639c2a4f9afacbc1716e288528a0271e651cae07d55b6f2d43ed2b90b18caf246daee9173a05c1bfb81cae653b1b58c2d9aed6aa6788f1` } + INTEGER { 65537 } + } + } + } + [3] { + SEQUENCE { + SEQUENCE { + # subjectKeyIdentifier + OBJECT_IDENTIFIER { 2.5.29.14 } + OCTET_STRING { + OCTET_STRING { `8b75d5accb08be0e1f65b7fa56be6ca775da85af` } + } + } + SEQUENCE { + # authorityKeyIdentifier + OBJECT_IDENTIFIER { 2.5.29.35 } + OCTET_STRING { + SEQUENCE { + [0 PRIMITIVE] { `8b75d5accb08be0e1f65b7fa56be6ca775da85af` } + } + } + } + SEQUENCE { + # basicConstraints + OBJECT_IDENTIFIER { 2.5.29.19 } + OCTET_STRING { + SEQUENCE { + BOOLEAN { TRUE } + } + } + } + } + } +}) + +SEQUENCE { + var("tbs_cert") + SEQUENCE { + # sha1WithRSAEncryption + OBJECT_IDENTIFIER { 1.2.840.113549.1.1.5 } + NULL {} + } + BIT_STRING { + `00` + sign("RSA_PKCS1_SHA1", var("my_key"), var("tbs_cert")) + } +} diff --git a/samples/certificates.md b/samples/certificates.md index e25e756..009bc11 100644 --- a/samples/certificates.md +++ b/samples/certificates.md @@ -2,8 +2,8 @@ Modifying and creating X.509 certificates is more involved than modifying a normal DER structure if one wishes to keep the signature valid. This document -provides instructions for fixing up a modified test certificate's signature if -the issuer's private key is available. (For a non-test certificate, this is the +provides instructions for using the `sign()` builtin to generate the signature +on-demand using the private key. (For a non-test certificate, this is the CA's private key and is presumably unavailable.) X.509 certificates are specified in [RFC 5280](https://tools.ietf.org/html/rfc5280). @@ -17,31 +17,32 @@ The basic top-level structure is: The `tbsCertificate` is a large structure with the contents of the certificate. This includes the subject, issuer, public key, etc. The `signatureAlgorithm` specifies the signature algorithm and parameters. Finally, the `signatureValue` -is the signature itself, created from the issuer's private key. This is the -field that must be fixed once the `tbsCertificate` is modified. - -The signature is computed over the serialized `tbsCertificate`, so, using a -text editor, copy the `tbsCertificate` value into its own file, `tbs-cert.txt`. -Now sign that with the issuing private key. If using OpenSSL's command-line -tool, here is a sample command: - - ascii2der -i tbs-cert.txt | openssl dgst -sha256 -sign issuer_key.pem | \ - xxd -p -c 9999 > signature.txt - -For other options, replace `-sha256` with a different digest or pass `-sigopt`. -See [OpenSSL's documentation](https://www.openssl.org/docs/man1.1.1/man1/dgst.html) -for details. Note that, for a valid certificate, the signature parameters -should match the `signatureAlgorithm` field. If using different signing -parameters, update it and the copy in the `tbsCertificate`. - -Finally, in a text editor, replace the signature with the new one. X.509 -defines certificates as BIT STRINGs, but every signature algorithm uses byte -strings, so include a leading zero to specify that no bits should be removed -from the end: - - BIT_STRING { - `00` # No unused bits. - `INSERT SIGNATURE HERE` +is the signature itself, created from the issuer's private key. We can express +this relationship using a variable and `sign()`: + + define("tbs_cert", SEQUENCE { + [0] { INTEGER { 2 } } + # Other X.509-ey goodness. + }) + + SEQUENCE { + # Splat in the actual tbsCertificate. + var("tbs_cert") + + # This is the signatureAlgorithm. + SEQUENCE { + # ed25519 + OBJECT_IDENTIFIER { 1.3.6.1.4.1.11591.15.1 } + } + + # This is the signatureValue. + BIT_STRING { + `00` # No unused bits. + sign("ed25519", var("my_key"), var("tbs_cert")) + } } -Finally, use `ascii2der` to convert the certificate to DER. +The variable `"my_key` would have been defined elsewhere in the file, or +potentially injected using the `-df` flag. + +See `cert_with_sign.txt` for a complete example. \ No newline at end of file