Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion core/data/tests/can_generate_const/input.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,39 @@
#[typeshare]
pub const MY_VAR: u32 = 12;
pub const MY_INT_VAR: u32 = 12;

// String literal-related consts below:

#[typeshare]
pub const EMPTY: &'static str = "";

#[typeshare]
pub const SIMPLE_ASCII: &'static str = "Hello, world!";

#[typeshare]
pub const MULTILINE: &'static str = "Line1
Line2
Line3";

#[typeshare]
pub const ESCAPED_CHARACTERS: &'static str = "First\\line.\nSecond \"quoted\" line.\tEnd.";

#[typeshare]
pub const UNICODE: &'static str = "Emoji: 😄, Accented: café, Chinese: 世界";

#[typeshare]
pub const RAW_STRING: &'static str = r#"Raw \n, "quotes" are okay, and single \ is fine too"#;

#[typeshare]
pub const CONTAINS_BACKTICK: &'static str = "Backtick: ` inside";

#[typeshare]
pub const CONTAINS_DOLLAR_CURLY: &'static str = "${not_interpolation}";

#[typeshare]
pub const ENDS_WITH_ODD_BACKSLASH: &'static str = r"Odd number of backslashes: \\\";

#[typeshare]
pub const NULL_BYTE: &'static str = "Null:\0End";

#[typeshare]
pub const COMBINING: &'static str = "e\u{301} vs é"; // normalization check
13 changes: 12 additions & 1 deletion core/data/tests/can_generate_const/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,15 @@ package proto

import "encoding/json"

const MyVar uint32 = 12
const MyIntVar uint32 = 12
const Empty string = ""
const SimpleAscii string = "Hello, world!"
const Multiline string = "Line1\nLine2\nLine3"
const EscapedCharacters string = "First\\line.\nSecond \"quoted\" line.\tEnd."
const Unicode string = "Emoji: 😄, Accented: café, Chinese: 世界"
const RawString string = `Raw \n, "quotes" are okay, and single \ is fine too`
const ContainsBacktick string = "Backtick: ` inside"
const ContainsDollarCurly string = "${not_interpolation}"
const EndsWithOddBackslash string = `Odd number of backslashes: \\\`
const NullByte string = "Null:\x00End"
const Combining string = "é vs é"
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the original Rust string, these two accented characters are encoded differently (one as a combining accent). That's still the case here, but we've output the raw characters instead of escaped unicode, which makes them look the same here.

I think ideally, we'd export escaped unicode as-is (e.g. have const Combining string = "e\u0301 vs é"), but I wasn't sure how to achieve that.

16 changes: 15 additions & 1 deletion core/data/tests/can_generate_const/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,18 @@



MY_VAR: int = 12
MY_INT_VAR: int = 12
EMPTY: str = """"""
SIMPLE_ASCII: str = """Hello, world!"""
MULTILINE: str = """Line1
Line2
Line3"""
ESCAPED_CHARACTERS: str = """First\\line.
Second "quoted" line.\tEnd."""
UNICODE: str = """Emoji: 😄, Accented: café, Chinese: 世界"""
RAW_STRING: str = r"""Raw \n, "quotes" are okay, and single \ is fine too"""
CONTAINS_BACKTICK: str = """Backtick: ` inside"""
CONTAINS_DOLLAR_CURLY: str = """${not_interpolation}"""
ENDS_WITH_ODD_BACKSLASH: str = r"""Odd number of backslashes: \\""" + '\\'
NULL_BYTE: str = """Null:\x00End"""
COMBINING: str = """é vs é"""
16 changes: 15 additions & 1 deletion core/data/tests/can_generate_const/output.ts
Original file line number Diff line number Diff line change
@@ -1 +1,15 @@
export const MY_VAR: number = 12;
export const MY_INT_VAR: number = 12;
export const EMPTY: string = ``;
export const SIMPLE_ASCII: string = `Hello, world!`;
export const MULTILINE: string = `Line1
Line2
Line3`;
export const ESCAPED_CHARACTERS: string = `First\\line.
Second "quoted" line. End.`;
export const UNICODE: string = `Emoji: 😄, Accented: café, Chinese: 世界`;
export const RAW_STRING: string = String.raw`Raw \n, "quotes" are okay, and single \ is fine too`;
export const CONTAINS_BACKTICK: string = `Backtick: \` inside`;
export const CONTAINS_DOLLAR_CURLY: string = `\${not_interpolation}`;
export const ENDS_WITH_ODD_BACKSLASH: string = String.raw`Odd number of backslashes: \\` + '\\';
export const NULL_BYTE: string = `Null:\u0000End`;
export const COMBINING: string = `é vs é`;
64 changes: 63 additions & 1 deletion core/src/language/go.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ impl Language for Go {
}

fn write_const(&mut self, w: &mut dyn Write, c: &RustConst) -> std::io::Result<()> {
match c.expr {
match &c.expr {
RustConstExpr::Int(val) => {
let const_type = self
.format_type(&c.r#type, &[])
Expand All @@ -216,6 +216,19 @@ impl Language for Go {
val
)
}
RustConstExpr::String { value, is_raw } => {
let const_type = self
.format_type(&c.r#type, &[])
.map_err(std::io::Error::other)?;
let literal = make_go_string_literal(value, *is_raw);
writeln!(
w,
"const {} {} = {}",
c.id.renamed.to_pascal_case(),
const_type,
literal,
)
}
}
}

Expand Down Expand Up @@ -579,6 +592,55 @@ fn convert_acronyms_to_uppercase(uppercase_acronyms: Vec<String>, name: &str) ->
res
}

fn make_go_string_literal(value: &str, is_raw: bool) -> String {
/// Escape for Go interpreted string literal (double-quoted).
fn escape_interpreted(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
// Replace supported (recognizable) escape sequences + escape double quotes
'\\' => out.push_str(r"\\"),
'"' => out.push_str(r#"\""#),
'\n' => out.push_str(r"\n"),
'\r' => out.push_str(r"\r"),
'\t' => out.push_str(r"\t"),
'\x07' => out.push_str(r"\a"),
'\x08' => out.push_str(r"\b"),
'\x0c' => out.push_str(r"\f"),
'\x0b' => out.push_str(r"\v"),
c if (c as u32) < 0x20 => {
// Other control characters
out.push_str(&format!(r"\x{:02x}", c as u32));
}
_ => out.push(c),
}
}
format!(r#""{out}""#)
}

if is_raw {
// Raw string literal using backticks. Backticks inside the string literal are handled with concatenation.
let split: Vec<&str> = value.split('`').collect();
let mut pieces: Vec<String> = Vec::new();
for (i, segment) in split.iter().enumerate() {
pieces.push(format!("`{}`", segment));
if i != split.len() - 1 {
// Insert a literal backtick between raw pieces
pieces.push(r#""`""#.to_string());
}
}

if pieces.is_empty() {
"``".to_string()
} else {
pieces.join(" + ")
}
} else {
// Interpreted string: escape the input and return
escape_interpreted(value)
}
}

mod test {
#[test]
fn no_pointer_slice() {
Expand Down
93 changes: 92 additions & 1 deletion core/src/language/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ impl Language for Python {
}

fn write_const(&mut self, w: &mut dyn Write, c: &RustConst) -> std::io::Result<()> {
match c.expr {
match &c.expr {
RustConstExpr::Int(val) => {
let const_type = self
.format_type(&c.r#type, &[])
Expand All @@ -308,6 +308,19 @@ impl Language for Python {
val
)
}
RustConstExpr::String { value, is_raw } => {
let const_type = self
.format_type(&c.r#type, &[])
.map_err(std::io::Error::other)?;
let literal = make_python_string_literal(value, *is_raw);
writeln!(
w,
"{}: {} = {}",
c.id.renamed.to_snake_case().to_uppercase(),
const_type,
literal,
)
}
}
}

Expand Down Expand Up @@ -812,6 +825,84 @@ fn json_translation_for_type(python_type: &str) -> Option<CustomJsonTranslationF
.map(|custom_translation| (*custom_translation).to_owned())
}

fn make_python_string_literal(value: &str, is_raw: bool) -> String {
/// Escape a non-raw segment for inclusion inside a Python triple-quoted string.
/// Leaves newlines and tabs as-is; other control characters (< 0x20) are escaped as \xHH.
fn escape_non_raw_segment(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
// Replace supported (recognizable) escape sequences
'\\' => out.push_str(r"\\"),
'\r' => out.push_str(r"\r"),
'\t' => out.push_str(r"\t"),
'\x08' => out.push_str(r"\b"),
'\x0c' => out.push_str(r"\f"),
'\n' => out.push('\n'),
c if (c as u32) < 0x20 => {
// Other control characters
out.push_str(&format!(r"\x{:<02x}", c as u32));
}
_ => out.push(c),
}
}
format!(r#""""{out}""""#)
}

/// Given a raw segment, returns one or two Python literal pieces:
/// - The main raw triple-quoted segment, adjusted so it does not end with an odd number of backslashes.
/// - Optionally an extra piece to supply a trailing backslash if we had to strip one off.
fn format_raw_segment_parts(s: &str) -> Vec<String> {
// Count trailing backslashes
let mut num_trailing_backslashes = 0;
for &byte in s.as_bytes().iter().rev() {
if byte == b'\\' {
num_trailing_backslashes += 1;
} else {
break;
}
}

let mut parts = Vec::new();
if num_trailing_backslashes % 2 == 1 {
// Strip the last backslash so the raw string doesn't end with an odd number of backslashes
let trimmed = &s[..s.len() - 1];
parts.push(format!(r#"r"""{trimmed}""""#));
// Append a normal single-quoted backslash literal to replace the stripped backslash
parts.push(r"'\\'".to_string());
} else {
parts.push(format!(r#"r"""{s}""""#));
}
parts
}

// Split on triple quotes; we will re-insert them as separate literal `'"""'`
let split: Vec<&str> = value
.split(r#"""""#) // This is a literal triple-quote (`"""`)
.collect();
let mut pieces: Vec<String> = Vec::new();

for (i, segment) in split.iter().enumerate() {
if is_raw {
// Raw segment(s), possibly with an extra piece if it ended with odd backslashes.
let mut raw_parts = format_raw_segment_parts(segment);
pieces.append(&mut raw_parts);
} else {
// Non-raw: escape and append to `pieces`
let escaped = escape_non_raw_segment(segment);
pieces.push(escaped);
}

// After every split except the last, insert the triple-quote literal itself.
if i != split.len() - 1 {
pieces.push(r#"'"""'"#.to_string());
}
}

// Return the pieces concatenated together with ` + ` (if there's only one piece, will return that unmodified)
pieces.join(" + ")
}

#[cfg(test)]
mod test {
use crate::rust_types::Id;
Expand Down
Loading