From 4c0bb8d7c4b8329d2c17755f50d400086fa0c10f Mon Sep 17 00:00:00 2001 From: Matthew Briggs Date: Thu, 31 Jul 2025 13:05:52 -0400 Subject: [PATCH] Support exporting string literals for some languages This builds upon PR #218 and partially resolves issue #228. Adds support for exporting string literals to Typescript, Python, and Go. --- core/data/tests/can_generate_const/input.rs | 39 +++++++- core/data/tests/can_generate_const/output.go | 13 ++- core/data/tests/can_generate_const/output.py | 16 ++- core/data/tests/can_generate_const/output.ts | 16 ++- core/src/language/go.rs | 64 +++++++++++- core/src/language/python.rs | 93 ++++++++++++++++- core/src/language/typescript.rs | 100 ++++++++++++++++++- core/src/parser.rs | 8 ++ core/src/rust_types.rs | 4 + 9 files changed, 346 insertions(+), 7 deletions(-) diff --git a/core/data/tests/can_generate_const/input.rs b/core/data/tests/can_generate_const/input.rs index f08d4043..4d4d4752 100644 --- a/core/data/tests/can_generate_const/input.rs +++ b/core/data/tests/can_generate_const/input.rs @@ -1,2 +1,39 @@ #[typeshare] -pub const MY_VAR: u32 = 12; +pub const MY_INT_VAR: u32 = 12; + +// String literal-related consts below: + +#[typeshare] +pub const EMPTY: &'static str = ""; + +#[typeshare] +pub const SIMPLE_ASCII: &'static str = "Hello, world!"; + +#[typeshare] +pub const MULTILINE: &'static str = "Line1 +Line2 +Line3"; + +#[typeshare] +pub const ESCAPED_CHARACTERS: &'static str = "First\\line.\nSecond \"quoted\" line.\tEnd."; + +#[typeshare] +pub const UNICODE: &'static str = "Emoji: 😄, Accented: café, Chinese: 世界"; + +#[typeshare] +pub const RAW_STRING: &'static str = r#"Raw \n, "quotes" are okay, and single \ is fine too"#; + +#[typeshare] +pub const CONTAINS_BACKTICK: &'static str = "Backtick: ` inside"; + +#[typeshare] +pub const CONTAINS_DOLLAR_CURLY: &'static str = "${not_interpolation}"; + +#[typeshare] +pub const ENDS_WITH_ODD_BACKSLASH: &'static str = r"Odd number of backslashes: \\\"; + +#[typeshare] +pub const NULL_BYTE: &'static str = "Null:\0End"; + +#[typeshare] +pub const COMBINING: &'static str = "e\u{301} vs é"; // normalization check diff --git a/core/data/tests/can_generate_const/output.go b/core/data/tests/can_generate_const/output.go index e8201346..7f96b150 100644 --- a/core/data/tests/can_generate_const/output.go +++ b/core/data/tests/can_generate_const/output.go @@ -2,4 +2,15 @@ package proto import "encoding/json" -const MyVar uint32 = 12 +const MyIntVar uint32 = 12 +const Empty string = "" +const SimpleAscii string = "Hello, world!" +const Multiline string = "Line1\nLine2\nLine3" +const EscapedCharacters string = "First\\line.\nSecond \"quoted\" line.\tEnd." +const Unicode string = "Emoji: 😄, Accented: café, Chinese: 世界" +const RawString string = `Raw \n, "quotes" are okay, and single \ is fine too` +const ContainsBacktick string = "Backtick: ` inside" +const ContainsDollarCurly string = "${not_interpolation}" +const EndsWithOddBackslash string = `Odd number of backslashes: \\\` +const NullByte string = "Null:\x00End" +const Combining string = "é vs é" diff --git a/core/data/tests/can_generate_const/output.py b/core/data/tests/can_generate_const/output.py index 58be6bec..47812be8 100644 --- a/core/data/tests/can_generate_const/output.py +++ b/core/data/tests/can_generate_const/output.py @@ -3,4 +3,18 @@ -MY_VAR: int = 12 +MY_INT_VAR: int = 12 +EMPTY: str = """""" +SIMPLE_ASCII: str = """Hello, world!""" +MULTILINE: str = """Line1 +Line2 +Line3""" +ESCAPED_CHARACTERS: str = """First\\line. +Second "quoted" line.\tEnd.""" +UNICODE: str = """Emoji: 😄, Accented: café, Chinese: 世界""" +RAW_STRING: str = r"""Raw \n, "quotes" are okay, and single \ is fine too""" +CONTAINS_BACKTICK: str = """Backtick: ` inside""" +CONTAINS_DOLLAR_CURLY: str = """${not_interpolation}""" +ENDS_WITH_ODD_BACKSLASH: str = r"""Odd number of backslashes: \\""" + '\\' +NULL_BYTE: str = """Null:\x00End""" +COMBINING: str = """é vs é""" diff --git a/core/data/tests/can_generate_const/output.ts b/core/data/tests/can_generate_const/output.ts index e742d471..dd80d550 100644 --- a/core/data/tests/can_generate_const/output.ts +++ b/core/data/tests/can_generate_const/output.ts @@ -1 +1,15 @@ -export const MY_VAR: number = 12; +export const MY_INT_VAR: number = 12; +export const EMPTY: string = ``; +export const SIMPLE_ASCII: string = `Hello, world!`; +export const MULTILINE: string = `Line1 +Line2 +Line3`; +export const ESCAPED_CHARACTERS: string = `First\\line. +Second "quoted" line. End.`; +export const UNICODE: string = `Emoji: 😄, Accented: café, Chinese: 世界`; +export const RAW_STRING: string = String.raw`Raw \n, "quotes" are okay, and single \ is fine too`; +export const CONTAINS_BACKTICK: string = `Backtick: \` inside`; +export const CONTAINS_DOLLAR_CURLY: string = `\${not_interpolation}`; +export const ENDS_WITH_ODD_BACKSLASH: string = String.raw`Odd number of backslashes: \\` + '\\'; +export const NULL_BYTE: string = `Null:\u0000End`; +export const COMBINING: string = `é vs é`; diff --git a/core/src/language/go.rs b/core/src/language/go.rs index f7950e30..37b9b8c5 100644 --- a/core/src/language/go.rs +++ b/core/src/language/go.rs @@ -203,7 +203,7 @@ impl Language for Go { } fn write_const(&mut self, w: &mut dyn Write, c: &RustConst) -> std::io::Result<()> { - match c.expr { + match &c.expr { RustConstExpr::Int(val) => { let const_type = self .format_type(&c.r#type, &[]) @@ -216,6 +216,19 @@ impl Language for Go { val ) } + RustConstExpr::String { value, is_raw } => { + let const_type = self + .format_type(&c.r#type, &[]) + .map_err(std::io::Error::other)?; + let literal = make_go_string_literal(value, *is_raw); + writeln!( + w, + "const {} {} = {}", + c.id.renamed.to_pascal_case(), + const_type, + literal, + ) + } } } @@ -579,6 +592,55 @@ fn convert_acronyms_to_uppercase(uppercase_acronyms: Vec, name: &str) -> res } +fn make_go_string_literal(value: &str, is_raw: bool) -> String { + /// Escape for Go interpreted string literal (double-quoted). + fn escape_interpreted(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + // Replace supported (recognizable) escape sequences + escape double quotes + '\\' => out.push_str(r"\\"), + '"' => out.push_str(r#"\""#), + '\n' => out.push_str(r"\n"), + '\r' => out.push_str(r"\r"), + '\t' => out.push_str(r"\t"), + '\x07' => out.push_str(r"\a"), + '\x08' => out.push_str(r"\b"), + '\x0c' => out.push_str(r"\f"), + '\x0b' => out.push_str(r"\v"), + c if (c as u32) < 0x20 => { + // Other control characters + out.push_str(&format!(r"\x{:02x}", c as u32)); + } + _ => out.push(c), + } + } + format!(r#""{out}""#) + } + + if is_raw { + // Raw string literal using backticks. Backticks inside the string literal are handled with concatenation. + let split: Vec<&str> = value.split('`').collect(); + let mut pieces: Vec = Vec::new(); + for (i, segment) in split.iter().enumerate() { + pieces.push(format!("`{}`", segment)); + if i != split.len() - 1 { + // Insert a literal backtick between raw pieces + pieces.push(r#""`""#.to_string()); + } + } + + if pieces.is_empty() { + "``".to_string() + } else { + pieces.join(" + ") + } + } else { + // Interpreted string: escape the input and return + escape_interpreted(value) + } +} + mod test { #[test] fn no_pointer_slice() { diff --git a/core/src/language/python.rs b/core/src/language/python.rs index c03bb302..c3bcc806 100644 --- a/core/src/language/python.rs +++ b/core/src/language/python.rs @@ -295,7 +295,7 @@ impl Language for Python { } fn write_const(&mut self, w: &mut dyn Write, c: &RustConst) -> std::io::Result<()> { - match c.expr { + match &c.expr { RustConstExpr::Int(val) => { let const_type = self .format_type(&c.r#type, &[]) @@ -308,6 +308,19 @@ impl Language for Python { val ) } + RustConstExpr::String { value, is_raw } => { + let const_type = self + .format_type(&c.r#type, &[]) + .map_err(std::io::Error::other)?; + let literal = make_python_string_literal(value, *is_raw); + writeln!( + w, + "{}: {} = {}", + c.id.renamed.to_snake_case().to_uppercase(), + const_type, + literal, + ) + } } } @@ -812,6 +825,84 @@ fn json_translation_for_type(python_type: &str) -> Option String { + /// Escape a non-raw segment for inclusion inside a Python triple-quoted string. + /// Leaves newlines and tabs as-is; other control characters (< 0x20) are escaped as \xHH. + fn escape_non_raw_segment(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + // Replace supported (recognizable) escape sequences + '\\' => out.push_str(r"\\"), + '\r' => out.push_str(r"\r"), + '\t' => out.push_str(r"\t"), + '\x08' => out.push_str(r"\b"), + '\x0c' => out.push_str(r"\f"), + '\n' => out.push('\n'), + c if (c as u32) < 0x20 => { + // Other control characters + out.push_str(&format!(r"\x{:<02x}", c as u32)); + } + _ => out.push(c), + } + } + format!(r#""""{out}""""#) + } + + /// Given a raw segment, returns one or two Python literal pieces: + /// - The main raw triple-quoted segment, adjusted so it does not end with an odd number of backslashes. + /// - Optionally an extra piece to supply a trailing backslash if we had to strip one off. + fn format_raw_segment_parts(s: &str) -> Vec { + // Count trailing backslashes + let mut num_trailing_backslashes = 0; + for &byte in s.as_bytes().iter().rev() { + if byte == b'\\' { + num_trailing_backslashes += 1; + } else { + break; + } + } + + let mut parts = Vec::new(); + if num_trailing_backslashes % 2 == 1 { + // Strip the last backslash so the raw string doesn't end with an odd number of backslashes + let trimmed = &s[..s.len() - 1]; + parts.push(format!(r#"r"""{trimmed}""""#)); + // Append a normal single-quoted backslash literal to replace the stripped backslash + parts.push(r"'\\'".to_string()); + } else { + parts.push(format!(r#"r"""{s}""""#)); + } + parts + } + + // Split on triple quotes; we will re-insert them as separate literal `'"""'` + let split: Vec<&str> = value + .split(r#"""""#) // This is a literal triple-quote (`"""`) + .collect(); + let mut pieces: Vec = Vec::new(); + + for (i, segment) in split.iter().enumerate() { + if is_raw { + // Raw segment(s), possibly with an extra piece if it ended with odd backslashes. + let mut raw_parts = format_raw_segment_parts(segment); + pieces.append(&mut raw_parts); + } else { + // Non-raw: escape and append to `pieces` + let escaped = escape_non_raw_segment(segment); + pieces.push(escaped); + } + + // After every split except the last, insert the triple-quote literal itself. + if i != split.len() - 1 { + pieces.push(r#"'"""'"#.to_string()); + } + } + + // Return the pieces concatenated together with ` + ` (if there's only one piece, will return that unmodified) + pieces.join(" + ") +} + #[cfg(test)] mod test { use crate::rust_types::Id; diff --git a/core/src/language/typescript.rs b/core/src/language/typescript.rs index 3f5555ad..1a0e447a 100644 --- a/core/src/language/typescript.rs +++ b/core/src/language/typescript.rs @@ -175,7 +175,7 @@ export const ReplacerFunc = (key: string, value: unknown): unknown => {{ } fn write_const(&mut self, w: &mut dyn Write, c: &RustConst) -> io::Result<()> { - match c.expr { + match &c.expr { RustConstExpr::Int(val) => { let const_type = self .format_type(&c.r#type, &[]) @@ -188,6 +188,19 @@ export const ReplacerFunc = (key: string, value: unknown): unknown => {{ val ) } + RustConstExpr::String { value, is_raw } => { + let const_type = self + .format_type(&c.r#type, &[]) + .map_err(std::io::Error::other)?; + let literal = make_typescript_string_literal(value, *is_raw); + writeln!( + w, + "export const {}: {} = {};", + c.id.renamed.to_snake_case().to_uppercase(), + const_type, + literal, + ) + } } } @@ -458,3 +471,88 @@ fn typescript_property_aware_rename(name: &str) -> String { } name.to_string() } + +fn make_typescript_string_literal(value: &str, is_raw: bool) -> String { + /// Escape a non-raw segment so that when placed inside a backtick template literal + /// it produces exactly `value` (no interpolation, preserves backslashes, backticks, etc). + fn escape_non_raw_segment(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + let mut chars = s.chars().peekable(); + while let Some(c) = chars.next() { + match c { + '\\' => out.push_str(r"\\"), + '\0' => out.push_str(r"\u0000"), + '`' => out.push_str(r"\`"), + '$' => { + // Lookahead for `{` to escape interpolation start + if matches!(chars.peek(), Some('{')) { + out.push_str(r"\${"); + chars.next(); // Consume '{' + } else { + out.push('$'); + } + } + _ => out.push(c), + } + } + out + } + + /// For raw segments: prevent interpolation (`${ ... }`) by escaping the `$`. + fn escape_raw_segment(s: &str) -> String { + s.replace("${", r"\${") + } + + /// Handle the odd-number-of-trailing-backslashes issue for a raw segment. + /// Returns one or two pieces: the main String.raw`` piece, and optionally a separate '\\' literal. + fn format_raw_segment_parts(s: &str) -> Vec { + let escaped = escape_raw_segment(s); + + // Count trailing backslashes in the escaped segment + let mut num_trailing_backslashes = 0; + for &byte in escaped.as_bytes().iter().rev() { + if byte == b'\\' { + num_trailing_backslashes += 1; + } else { + break; + } + } + + let mut parts = Vec::new(); + if num_trailing_backslashes % 2 == 1 { + // Strip one backslash so the template literal doesn't end with an unescaped backslash. + let trimmed = &escaped[..escaped.len() - 1]; + parts.push(format!("String.raw`{trimmed}`")); + // Append the stripped backslash as a normal string literal. + parts.push(r"'\\'".to_string()); + } else { + parts.push(format!("String.raw`{escaped}`")); + } + parts + } + + if is_raw { + // Split on backtick to avoid embedding unescaped backticks. + let split: Vec<&str> = value.split('`').collect(); + let mut pieces: Vec = Vec::new(); + + for (i, segment) in split.iter().enumerate() { + let mut seg_parts = format_raw_segment_parts(segment); + pieces.append(&mut seg_parts); + if i != split.len() - 1 { + // Insert a literal backtick between raw pieces + pieces.push("'`'".to_string()); + } + } + + if pieces.is_empty() { + "String.raw``".to_string() + } else { + pieces.join(" + ") + } + } else { + // Non-raw: single template literal, escape backticks, backslashes, and interpolation. + let escaped = escape_non_raw_segment(value); + format!("`{}`", escaped) + } +} diff --git a/core/src/parser.rs b/core/src/parser.rs index 53b9ed5d..2df6f069 100644 --- a/core/src/parser.rs +++ b/core/src/parser.rs @@ -557,6 +557,14 @@ fn parse_const_expr(e: &Expr) -> Result { .map_err(|_| ParseError::RustConstTypeInvalid)?; RustConstExpr::Int(int) } + Lit::Str(lit_str) => { + let repr = lit_str.token().to_string(); + let is_raw = repr.starts_with('r') || repr.starts_with("br"); + RustConstExpr::String { + value: lit_str.value(), + is_raw, + } + } _ => return Err(ParseError::RustConstTypeInvalid), }) }; diff --git a/core/src/rust_types.rs b/core/src/rust_types.rs index fab294a9..02086741 100644 --- a/core/src/rust_types.rs +++ b/core/src/rust_types.rs @@ -103,6 +103,10 @@ impl PartialEq for RustConst { pub enum RustConstExpr { /// Expression represents an integer. Int(i128), + String { + value: String, + is_raw: bool, + }, } /// Rust type alias.