Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions src/language/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,31 @@ impl Language {
&self.word_list()[first..first + count]
}

/// Japanese uses ideographic space (U+3000), all others use ASCII space.
#[inline]
pub fn separator(self) -> &'static str {
match self {
#[cfg(feature = "japanese")]
Language::Japanese => " ", // U+3000 ideographic space
_ => " ", // ASCII space for all other languages
}
}

/// Split mnemonic into words using language-appropriate separators.
/// Japanese uses ideographic spaces (U+3000), others use standard whitespace.
pub fn split_mnemonic<'a>(self, mnemonic: &'a str) -> Box<dyn Iterator<Item = &'a str> + 'a> {
match self {
#[cfg(feature = "japanese")]
Language::Japanese => {
// For Japanese, only split on ideographic spaces (U+3000)
Box::new(mnemonic.split(' ').filter(|s| !s.is_empty()))
}
_ => {
Box::new(mnemonic.split_whitespace())
}
}
}

/// Get the index of the word in the word list.
#[inline]
pub fn find_word(self, word: &str) -> Option<u16> {
Expand Down
29 changes: 23 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,12 +430,20 @@ impl Mnemonic {
/// interpreted in multiple languages, an [Error::AmbiguousLanguages] is
/// returned, containing the possible languages.
pub fn language_of<S: AsRef<str>>(mnemonic: S) -> Result<Language, Error> {
Mnemonic::language_of_iter(mnemonic.as_ref().split_whitespace())
// Split on both standard whitespace and ideographic space to handle Japanese
let words: Vec<&str> = mnemonic.as_ref()
.split(|c: char| c.is_whitespace() || c == ' ')
.filter(|s| !s.is_empty())
.collect();
Mnemonic::language_of_iter(words.into_iter())
}

/// Parse a mnemonic in normalized UTF8 in the given language.
pub fn parse_in_normalized(language: Language, s: &str) -> Result<Mnemonic, Error> {
let nb_words = s.split_whitespace().count();
let word_iter = language.split_mnemonic(s);
let words_vec: Vec<&str> = word_iter.collect();
let nb_words = words_vec.len();

if is_invalid_word_count(nb_words) {
return Err(Error::BadWordCount(nb_words));
}
Expand All @@ -447,7 +455,7 @@ impl Mnemonic {
// We only use `nb_words * 11` elements in this array.
let mut bits = [false; MAX_NB_WORDS * 11];

for (i, word) in s.split_whitespace().enumerate() {
for (i, word) in words_vec.iter().enumerate() {
let idx = language.find_word(word).ok_or(Error::UnknownWord(i))?;

words[i] = idx;
Expand Down Expand Up @@ -490,15 +498,18 @@ impl Mnemonic {
language: Language,
s: &str,
) -> Result<Mnemonic, Error> {
let nb_words = s.split_whitespace().count();
let word_iter = language.split_mnemonic(s);
let words_vec: Vec<&str> = word_iter.collect();
let nb_words = words_vec.len();

if is_invalid_word_count(nb_words) {
return Err(Error::BadWordCount(nb_words));
}

// Here we will store the eventual words.
let mut words = [EOF; MAX_NB_WORDS];

for (i, word) in s.split_whitespace().enumerate() {
for (i, word) in words_vec.iter().enumerate() {
let idx = language.find_word(word).ok_or(Error::UnknownWord(i))?;

words[i] = idx;
Expand All @@ -524,6 +535,11 @@ impl Mnemonic {
) -> Result<Mnemonic, Error> {
let mut cow = s.into();
Mnemonic::normalize_utf8_cow(&mut cow);
// For Japanese, convert ASCII spaces back to ideographic spaces after normalization
#[cfg(feature = "japanese")]
if language == Language::Japanese && cow.contains(' ') {
cow = Cow::Owned(cow.replace(' ', " "));
}
Ok(Mnemonic::parse_in_normalized(language, cow.as_ref())?)
}

Expand Down Expand Up @@ -645,9 +661,10 @@ impl Mnemonic {

impl fmt::Display for Mnemonic {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let separator = self.lang.separator();
for (i, word) in self.words().enumerate() {
if i > 0 {
f.write_str(" ")?;
f.write_str(separator)?;
}
f.write_str(word)?;
}
Expand Down
58 changes: 58 additions & 0 deletions tests/japanese_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#[cfg(feature = "japanese")]
mod japanese_tests {
use bip39::{Mnemonic, Language};
use bitcoin_hashes::hex::FromHex;

#[test]
fn test_japanese_vectors() {
let test_vectors = [
(
"00000000000000000000000000000000",
"あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あいこくしん あおぞら",
"メートルガバヴァぱばぐゞちぢ十人十色",
"a262d6fb6122ecf45be09c50492b31f92e9beb7d9a845987a02cefda57a15f9c467a17872029a9e92299b5cbdf306e3a0ee620245cbd508959b6cb7ca637bd55",
),
(
"7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f7f",
"そつう れきだい ほんやく わかす りくつ ばいか ろせん やちん そつう れきだい ほんやく わかめ",
"メートルガバヴァぱばぐゞちぢ十人十色",
"aee025cbe6ca256862f889e48110a6a382365142f7d16f2b9545285b3af64e542143a577e9c144e101a6bdca18f8d97ec3366ebf5b088b1c1af9bc31346e60d9",
),
(
"80808080808080808080808080808080",
"そとづら あまど おおう あこがれる いくぶん けいけん あたえる いよく そとづら あまど おおう あかちゃん",
"メートルガバヴァぱばぐゞちぢ十人十色",
"e51736736ebdf77eda23fa17e31475fa1d9509c78f1deb6b4aacfbd760a7e2ad769c714352c95143b5c1241985bcb407df36d64e75dd5a2b78ca5d2ba82a3544",
),
(
"ffffffffffffffffffffffffffffffff",
"われる われる われる われる われる われる われる われる われる われる われる ろんぶん",
"メートルガバヴァぱばぐゞちぢ十人十色",
"4cd2ef49b479af5e1efbbd1e0bdc117f6a29b1010211df4f78e2ed40082865793e57949236c43b9fe591ec70e5bb4298b8b71dc4b267bb96ed4ed282c8f7761c",
),
(
"77c2b00716cec7213839159e404db50d",
"せまい うちがわ あずき かろう めずらしい だんち ますく おさめる ていぼう あたる すあな えしゃく",
"メートルガバヴァぱばぐゞちぢ十人十色",
"344cef9efc37d0cb36d89def03d09144dd51167923487eec42c487f7428908546fa31a3c26b7391a2b3afe7db81b9f8c5007336b58e269ea0bd10749a87e0193",
),
];

for (entropy_hex, expected_mnemonic, passphrase, expected_seed_hex) in test_vectors.iter() {
let entropy = Vec::from_hex(entropy_hex).unwrap();
let mnemonic = Mnemonic::from_entropy_in(Language::Japanese, &entropy).unwrap();
let generated_mnemonic = mnemonic.to_string();

// Test ideographic spaces in generated mnemonic
assert!(generated_mnemonic.contains(' '), "Generated mnemonic should use ideographic spaces");

let parsed_expected = Mnemonic::parse_in(Language::Japanese, *expected_mnemonic).unwrap();
assert_eq!(parsed_expected.to_entropy(), entropy, "Expected mnemonic should parse back to original entropy");

// Test seed generation with expected mnemonic (using to_seed to handle normalization)
let seed = parsed_expected.to_seed(*passphrase);
let expected_seed = Vec::from_hex(expected_seed_hex).unwrap();
assert_eq!(seed.to_vec(), expected_seed, "Seed should match expected for entropy {}", entropy_hex);
}
}
}