From 7e81e85f42053fe66f62176d0ae72f583508df81 Mon Sep 17 00:00:00 2001 From: higgs01 <6546697+higgs01@users.noreply.github.com> Date: Wed, 16 Jul 2025 20:27:56 +0200 Subject: [PATCH 1/2] feat: automatically sanitise usernames on create/update Signed-off-by: higgs01 <6546697+higgs01@users.noreply.github.com> --- .../core/database/src/models/users/model.rs | 80 +++++++++++-------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/crates/core/database/src/models/users/model.rs b/crates/core/database/src/models/users/model.rs index 0c2b6cee8..38bef50e0 100644 --- a/crates/core/database/src/models/users/model.rs +++ b/crates/core/database/src/models/users/model.rs @@ -7,6 +7,7 @@ use futures::future::join_all; use iso8601_timestamp::Timestamp; use once_cell::sync::Lazy; use rand::seq::SliceRandom; +use regex::{Regex, RegexBuilder}; use revolt_config::{config, FeaturesLimits}; use revolt_models::v0::{self, UserBadges, UserFlags}; use revolt_presence::filter_online; @@ -149,6 +150,12 @@ auto_derived!( } ); +static BLOCKED_USERNAME_PATTERNS: Lazy = Lazy::new(|| { + RegexBuilder::new("`{3}|((discord|rvlt|guilded)\\.gg|https?:\\/\\/)") + .case_insensitive(true) + .build().unwrap() +}); + pub static DISCRIMINATOR_SEARCH_SPACE: Lazy> = Lazy::new(|| { let mut set = (2..9999) .map(|v| format!("{:0>4}", v)) @@ -198,11 +205,15 @@ impl User { I: Into>, D: Into>, { - let username = User::validate_username(username)?; + let original_username = username; + let original_username_str = original_username.as_str(); + User::validate_username(original_username_str)?; + let (is_username_sanitised,username) = User::sanitise_username(original_username_str); let mut user = User { id: account_id.into().unwrap_or_else(|| Ulid::new().to_string()), discriminator: User::find_discriminator(db, &username, None).await?, username, + display_name: if is_username_sanitised { Some(original_username) } else { Default::default() }, last_acknowledged_policy_change: Timestamp::now_utc(), ..Default::default() }; @@ -278,16 +289,10 @@ impl User { } } - /// Sanitise and validate a username can be used - pub fn validate_username(username: String) -> Result { + /// validate a username for blocked names + pub fn validate_username(username: &str) -> Result<()> { // Copy the username for validation let username_lowercase = username.to_lowercase(); - - // Block homoglyphs - if decancer::cure(&username_lowercase).into_str() != username_lowercase { - return Err(create_error!(InvalidUsername)); - } - // Ensure the username itself isn't blocked const BLOCKED_USERNAMES: &[&str] = &["admin", "revolt"]; @@ -297,23 +302,26 @@ impl User { } } - // Ensure none of the following substrings show up in the username - const BLOCKED_SUBSTRINGS: &[&str] = &[ - "```", - "discord.gg", - "rvlt.gg", - "guilded.gg", - "https://", - "http://", - ]; - - for substr in BLOCKED_SUBSTRINGS { - if username_lowercase.contains(substr) { - return Err(create_error!(InvalidUsername)); - } + Ok(()) + } + + /// sanitise username + pub fn sanitise_username(username: &str) -> (bool, String) { + let original_username = username; + // sanitise homoglyphs + let mut username = decancer::cure(&username).into_str(); + + if BLOCKED_USERNAME_PATTERNS.is_match(&username) { + username = BLOCKED_USERNAME_PATTERNS.replace_all(&mut username, "").into_owned(); } - Ok(username) + const USERNAME_MIN_LEN: usize = 2; + let username_length_diff = USERNAME_MIN_LEN.saturating_sub(username.len()); + if username_length_diff > 0 { + username.push_str(&"_".repeat(username_length_diff)) + } + + (original_username != username, username) } /// Find a user and session ID from a given token and hint @@ -416,12 +424,24 @@ impl User { /// Update a user's username pub async fn update_username(&mut self, db: &Database, username: String) -> Result<()> { - let username = User::validate_username(username)?; - if self.username.to_lowercase() == username.to_lowercase() { + let original_username = username; + let original_username_str = original_username.as_str(); + User::validate_username(original_username_str)?; + let (is_username_sanitised, username) = User::sanitise_username(&original_username); + if is_username_sanitised { self.update( db, PartialUser { + discriminator: Some( + User::find_discriminator( + db, + &username, + Some((self.discriminator.to_string(), self.id.clone())), + ) + .await?, + ), username: Some(username), + display_name: if is_username_sanitised { Some(original_username) } else { Default::default() }, ..Default::default() }, vec![], @@ -431,14 +451,6 @@ impl User { self.update( db, PartialUser { - discriminator: Some( - User::find_discriminator( - db, - &username, - Some((self.discriminator.to_string(), self.id.clone())), - ) - .await?, - ), username: Some(username), ..Default::default() }, From 9b6ebf7c813c6dbe8741f6680b0a029e0bd69871 Mon Sep 17 00:00:00 2001 From: higgs01 <6546697+higgs01@users.noreply.github.com> Date: Wed, 16 Jul 2025 21:23:36 +0200 Subject: [PATCH 2/2] test: add tests for validation and sanitasion Signed-off-by: higgs01 <6546697+higgs01@users.noreply.github.com> --- Cargo.lock | 20 +-- crates/core/database/Cargo.toml | 2 +- .../core/database/src/models/users/model.rs | 128 ++++++++++++++++-- 3 files changed, 132 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05033c65a..9ab5b103f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2003,9 +2003,13 @@ dependencies = [ [[package]] name = "decancer" -version = "1.6.5" +version = "3.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080b09f6adad25c23d8c47c54e52e59b0dc09d079c4b23e0f147dac440359d0d" +checksum = "a9244323129647178bf41ac861a2cdb9d9c81b9b09d3d0d1de9cd302b33b8a1d" +dependencies = [ + "lazy_static", + "regex", +] [[package]] name = "der" @@ -2384,7 +2388,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3806,7 +3810,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi 0.5.1", "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6902,7 +6906,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -6915,7 +6919,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.9.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -8039,7 +8043,7 @@ dependencies = [ "getrandom 0.3.3", "once_cell", "rustix 1.0.7", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -9236,7 +9240,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/crates/core/database/Cargo.toml b/crates/core/database/Cargo.toml index 7f2e09730..7dbe901ea 100644 --- a/crates/core/database/Cargo.toml +++ b/crates/core/database/Cargo.toml @@ -47,7 +47,7 @@ nanoid = "0.4.0" base64 = "0.21.3" once_cell = "1.17" indexmap = "1.9.1" -decancer = "1.6.2" +decancer = "3.3.3" deadqueue = "0.2.4" linkify = { optional = true, version = "0.8.1" } url-escape = { optional = true, version = "0.1.1" } diff --git a/crates/core/database/src/models/users/model.rs b/crates/core/database/src/models/users/model.rs index 38bef50e0..a73e17f1e 100644 --- a/crates/core/database/src/models/users/model.rs +++ b/crates/core/database/src/models/users/model.rs @@ -150,12 +150,6 @@ auto_derived!( } ); -static BLOCKED_USERNAME_PATTERNS: Lazy = Lazy::new(|| { - RegexBuilder::new("`{3}|((discord|rvlt|guilded)\\.gg|https?:\\/\\/)") - .case_insensitive(true) - .build().unwrap() -}); - pub static DISCRIMINATOR_SEARCH_SPACE: Lazy> = Lazy::new(|| { let mut set = (2..9999) .map(|v| format!("{:0>4}", v)) @@ -170,6 +164,12 @@ pub static DISCRIMINATOR_SEARCH_SPACE: Lazy> = Lazy::new(|| { set.into_iter().collect() }); +static BLOCKED_USERNAME_PATTERNS: Lazy = Lazy::new(|| { + RegexBuilder::new("`{3}|((discord|rvlt|guilded)\\.gg|https?:\\/\\/)") + .case_insensitive(true) + .build().unwrap() +}); + #[allow(clippy::derivable_impls)] impl Default for User { fn default() -> Self { @@ -290,7 +290,7 @@ impl User { } /// validate a username for blocked names - pub fn validate_username(username: &str) -> Result<()> { + fn validate_username(username: &str) -> Result<()> { // Copy the username for validation let username_lowercase = username.to_lowercase(); // Ensure the username itself isn't blocked @@ -306,10 +306,12 @@ impl User { } /// sanitise username - pub fn sanitise_username(username: &str) -> (bool, String) { + fn sanitise_username(username: &str) -> (bool, String) { let original_username = username; // sanitise homoglyphs - let mut username = decancer::cure(&username).into_str(); + let options = decancer::Options::default() + .retain_capitalization(); + let mut username = decancer::cure(&username, options).unwrap().to_string(); if BLOCKED_USERNAME_PATTERNS.is_match(&username) { username = BLOCKED_USERNAME_PATTERNS.replace_all(&mut username, "").into_owned(); @@ -837,3 +839,111 @@ impl User { badges } } + +#[cfg(test)] +mod tests { + use crate::{User}; + + #[test] + fn username_validation() { + let username_admin = "Admin"; + let username_revolt = "Revolt"; + let username_allowed = "Allowed"; + + assert!(User::validate_username(username_admin).is_err()); + assert!(User::validate_username(username_revolt).is_err()); + assert!(User::validate_username(username_allowed).is_ok()); + } + + #[test] + fn username_sanitisation_clean() { + let username_clean = "Test"; + + let (is_sanitised, username_clean_sanitised) = User::sanitise_username(username_clean); + + assert!(!is_sanitised); + assert_eq!(username_clean, username_clean_sanitised) + } + + #[test] + fn username_sanitisation_hemoglyphs() { + let username_hemoglyphs = "𝔽𝕌Ňℕy"; + + let (is_hemoglyphs_sanitised, username_hemoglyphs_sanitised) = User::sanitise_username(username_hemoglyphs); + + assert!(is_hemoglyphs_sanitised); + assert_eq!("funny", username_hemoglyphs_sanitised); + } + + #[test] + fn username_blocked_patterns() { + let username_grave = "```_test"; + let username_discord = "discord.gg_test"; + let username_rvlt = "rvlt.gg_test"; + let username_guilded = "guilded.gg_test"; + let username_http = "http://_test"; + let username_https = "https://_test"; + + let expected_username = "_test"; + + let (is_grave_sanitised, username_grave_sanitised) = User::sanitise_username(username_grave); + let (is_discord_sanitised, username_discord_sanitised) = User::sanitise_username(username_discord); + let (is_rvlt_sanitised, username_rvlt_sanitised) = User::sanitise_username(username_rvlt); + let (is_guilded_sanitised, username_guilded_sanitised) = User::sanitise_username(username_guilded); + let (is_http_sanitised, username_http_sanitised) = User::sanitise_username(username_http); + let (is_https_sanitised, username_https_sanitised) = User::sanitise_username(username_https); + + assert!(is_grave_sanitised); + assert_eq!(expected_username, username_grave_sanitised); + assert!(is_discord_sanitised); + assert_eq!(expected_username, username_discord_sanitised); + assert!(is_rvlt_sanitised); + assert_eq!(expected_username, username_rvlt_sanitised); + assert!(is_guilded_sanitised); + assert_eq!(expected_username, username_guilded_sanitised); + assert!(is_http_sanitised); + assert_eq!(expected_username, username_http_sanitised); + assert!(is_https_sanitised); + assert_eq!(expected_username, username_https_sanitised); + } + + #[test] + fn username_sanitisation_padding() { + let username_padding = "```a"; + + let (_, username) = User::sanitise_username(username_padding); + + assert_eq!("a_", username); + } + + #[async_std::test] + async fn create_user() { + database_test!(|db| async move { + let created_clean = User::create(&db, "Test".to_string(), None, None) + .await + .unwrap(); + + let mut updated_clean = User::create(&db, "Test".to_string(), None, None) + .await + .unwrap(); + updated_clean.update_username(&db, "Test2".to_string()).await.unwrap(); + + let created_sanitised = User::create(&db, "http://test".to_string(), None, None) + .await + .unwrap(); + + let mut updated_sanitised = User::create(&db, "Test".to_string(), None, None) + .await + .unwrap(); + updated_sanitised.update_username(&db, "http://test".to_string()).await.unwrap(); + + assert_eq!(None, created_clean.display_name); + assert_eq!(None, updated_clean.display_name); + assert_eq!("test", created_sanitised.username); + assert_eq!("http://test", created_sanitised.display_name.unwrap()); + assert_eq!("test", updated_sanitised.username); + assert_eq!("http://test", updated_sanitised.display_name.unwrap()); + }); + } + +} \ No newline at end of file