Skip to content

Commit 3006600

Browse files
authored
feat(parser): enhance user agent parsing logic and add tests for inva… (#76)
1 parent ce92376 commit 3006600

File tree

4 files changed

+104
-40
lines changed

4 files changed

+104
-40
lines changed

README.md

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -122,21 +122,21 @@ AMD Ryzen 9 9950X, 1 CPU, 32 logical and 16 physical cores
122122
Job=ShortRun IterationCount=3 LaunchCount=1
123123
WarmupCount=3
124124

125-
| Method | Categories | Data | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio |
126-
|------------------- |----------- |------------- |----------------:|-----------------:|---------------:|----------:|--------:|---------:|---------:|---------:|-----------:|------------:|
127-
| MyCSharp | Basic | Chrome Win10 | 936.44 ns | 131.253 ns | 7.194 ns | 1.00 | 0.01 | 0.0029 | - | - | 48 B | 1.00 |
128-
| UAParser | Basic | Chrome Win10 | 9,512,347.40 ns | 3,961,045.109 ns | 217,118.249 ns | 10,158.42 | 211.89 | 656.2500 | 546.8750 | 109.3750 | 11523315 B | 240,069.06 |
129-
| DeviceDetector.NET | Basic | Chrome Win10 | 5,428,530.73 ns | 5,276,988.556 ns | 289,249.550 ns | 5,797.23 | 270.29 | 296.8750 | 125.0000 | 31.2500 | 5002239 B | 104,213.31 |
130-
| | | | | | | | | | | | | |
131-
| MyCSharp | Basic | Google-Bot | 165.66 ns | 21.926 ns | 1.202 ns | 1.00 | 0.01 | - | - | - | - | NA |
132-
| UAParser | Basic | Google-Bot | 9,737,403.12 ns | 2,336,698.462 ns | 128,082.328 ns | 58,781.92 | 764.74 | 671.8750 | 656.2500 | 109.3750 | 11877003 B | NA |
133-
| DeviceDetector.NET | Basic | Google-Bot | 6,331,960.42 ns | 1,602,716.199 ns | 87,850.283 ns | 38,224.23 | 518.30 | 500.0000 | 62.5000 | - | 8817013 B | NA |
134-
| | | | | | | | | | | | | |
135-
| MyCSharp | Cached | Chrome Win10 | 26.75 ns | 3.749 ns | 0.205 ns | 1.00 | 0.01 | - | - | - | - | NA |
136-
| UAParser | Cached | Chrome Win10 | 250,039.55 ns | 6,502.182 ns | 356.407 ns | 9,346.54 | 63.39 | 2.1973 | - | - | 37488 B | NA |
137-
| | | | | | | | | | | | | |
138-
| MyCSharp | Cached | Google-Bot | 19.66 ns | 4.312 ns | 0.236 ns | 1.00 | 0.01 | - | - | - | - | NA |
139-
| UAParser | Cached | Google-Bot | 184,991.85 ns | 46,235.986 ns | 2,534.350 ns | 9,408.77 | 148.82 | 2.6855 | - | - | 45857 B | NA |
125+
| Method | Categories | Data | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio |
126+
|------------------- |----------- |------------- |----------------:|-----------------:|---------------:|----------:|---------:|---------:|---------:|---------:|-----------:|------------:|
127+
| MyCSharp | Basic | Chrome Win10 | 871.85 ns | 132.008 ns | 7.236 ns | 1.00 | 0.01 | 0.0029 | - | - | 48 B | 1.00 |
128+
| UAParser | Basic | Chrome Win10 | 8,901,909.90 ns | 3,411,259.484 ns | 186,982.644 ns | 10,210.80 | 199.60 | 656.2500 | 578.1250 | 109.3750 | 11523310 B | 240,068.96 |
129+
| DeviceDetector.NET | Basic | Chrome Win10 | 5,391,412.50 ns | 8,253,446.769 ns | 452,399.269 ns | 6,184.14 | 451.58 | 296.8750 | 125.0000 | 31.2500 | 5002239 B | 104,213.31 |
130+
| | | | | | | | | | | | | |
131+
| MyCSharp | Basic | Google-Bot | 158.80 ns | 19.584 ns | 1.073 ns | 1.00 | 0.01 | - | - | - | - | NA |
132+
| UAParser | Basic | Google-Bot | 9,666,739.32 ns | 7,566,085.041 ns | 414,722.653 ns | 60,873.62 | 2,289.43 | 671.8750 | 656.2500 | 109.3750 | 11876998 B | NA |
133+
| DeviceDetector.NET | Basic | Google-Bot | 6,106,666.41 ns | 593,634.990 ns | 32,539.137 ns | 38,455.05 | 285.97 | 539.0625 | 117.1875 | 23.4375 | 8817078 B | NA |
134+
| | | | | | | | | | | | | |
135+
| MyCSharp | Cached | Chrome Win10 | 26.43 ns | 0.132 ns | 0.007 ns | 1.00 | 0.00 | - | - | - | - | NA |
136+
| UAParser | Cached | Chrome Win10 | 177,417.99 ns | 24,390.139 ns | 1,336.906 ns | 6,713.66 | 43.84 | 2.1973 | - | - | 37488 B | NA |
137+
| | | | | | | | | | | | | |
138+
| MyCSharp | Cached | Google-Bot | 17.03 ns | 1.835 ns | 0.101 ns | 1.00 | 0.01 | - | - | - | - | NA |
139+
| UAParser | Cached | Google-Bot | 129,445.13 ns | 21,319.059 ns | 1,168.570 ns | 7,599.76 | 70.93 | 2.6855 | - | - | 45857 B | NA |
140140
```
141141

142142
## Disclaimer

src/HttpUserAgentParser/HttpUserAgentParser.cs

Lines changed: 38 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ namespace MyCSharp.HttpUserAgentParser;
1111
/// Parser logic for user agents
1212
/// </summary>
1313
public static class HttpUserAgentParser
14-
1514
{
1615
/// <summary>
1716
/// Parses given <param name="userAgent">user agent</param>
@@ -48,7 +47,6 @@ public static HttpUserAgentInformation Parse(string userAgent)
4847
/// </summary>
4948
public static HttpUserAgentPlatformInformation? GetPlatform(string userAgent)
5049
{
51-
// Fast, allocation-free token scan (keeps public statics untouched)
5250
ReadOnlySpan<char> ua = userAgent.AsSpan();
5351
foreach ((string Token, string Name, HttpUserAgentPlatformType PlatformType) platform in HttpUserAgentStatics.s_platformRules)
5452
{
@@ -78,6 +76,7 @@ public static bool TryGetPlatform(string userAgent, [NotNullWhen(true)] out Http
7876
public static (string Name, string? Version)? GetBrowser(string userAgent)
7977
{
8078
ReadOnlySpan<char> ua = userAgent.AsSpan();
79+
8180
foreach ((string Name, string DetectToken, string? VersionToken) browserRule in HttpUserAgentStatics.s_browserRules)
8281
{
8382
if (!TryIndexOf(ua, browserRule.DetectToken, out int detectIndex))
@@ -86,7 +85,18 @@ public static (string Name, string? Version)? GetBrowser(string userAgent)
8685
}
8786

8887
// Version token may differ (e.g., Safari uses "Version/")
89-
int versionSearchStart = detectIndex;
88+
89+
int versionSearchStart;
90+
// For rules without a specific version token, ensure pattern Token/<digits>
91+
if (string.IsNullOrEmpty(browserRule.VersionToken))
92+
{
93+
int afterDetect = detectIndex + browserRule.DetectToken.Length;
94+
if (afterDetect >= ua.Length || ua[afterDetect] != '/')
95+
{
96+
// Likely a misspelling or partial token (e.g., Edgg, Oprea, Chromee)
97+
continue;
98+
}
99+
}
90100
if (!string.IsNullOrEmpty(browserRule.VersionToken))
91101
{
92102
if (TryIndexOf(ua, browserRule.VersionToken!, out int vtIndex))
@@ -104,14 +114,14 @@ public static (string Name, string? Version)? GetBrowser(string userAgent)
104114
versionSearchStart = detectIndex + browserRule.DetectToken.Length;
105115
}
106116

107-
string? version = null;
108-
ua = ua.Slice(versionSearchStart);
109-
if (TryExtractVersion(ua, out Range range))
117+
ReadOnlySpan<char> search = ua.Slice(versionSearchStart);
118+
if (TryExtractVersion(search, out Range range))
110119
{
111-
version = ua[range].ToString();
120+
string? version = search[range].ToString();
121+
return (browserRule.Name, version);
112122
}
113123

114-
return (browserRule.Name, version);
124+
// If we didn't find a version for this rule, try next rule
115125
}
116126

117127
return null;
@@ -198,39 +208,43 @@ private static bool TryExtractVersion(ReadOnlySpan<char> haystack, out Range ran
198208

199209
// Limit search window to avoid scanning entire UA string unnecessarily
200210
const int Window = 128;
201-
if (haystack.Length >= Window)
211+
if (haystack.Length > Window)
202212
{
203213
haystack = haystack.Slice(0, Window);
204214
}
205215

206-
int i = 0;
207-
for (; i < haystack.Length; ++i)
216+
// Find first digit
217+
int start = -1;
218+
for (int i = 0; i < haystack.Length; i++)
208219
{
209220
char c = haystack[i];
210-
if (char.IsBetween(c, '0', '9'))
221+
if (c >= '0' && c <= '9')
211222
{
223+
start = i;
212224
break;
213225
}
214226
}
215227

216-
int s = i;
217-
haystack = haystack.Slice(i + 1);
218-
for (i = 0; i < haystack.Length; ++i)
228+
if (start < 0)
219229
{
220-
char c = haystack[i];
221-
if (!(char.IsBetween(c, '0', '9') || c == '.'))
222-
{
223-
break;
224-
}
230+
// No digit found => no version
231+
return false;
225232
}
226-
i += s + 1; // shift back the previous domain
227233

228-
if (i == s)
234+
// Consume digits and dots after first digit
235+
int end = start + 1;
236+
while (end < haystack.Length)
229237
{
230-
return false;
238+
char c = haystack[end];
239+
if (!((c >= '0' && c <= '9') || c == '.'))
240+
{
241+
break;
242+
}
243+
end++;
231244
}
232245

233-
range = new Range(s, i);
246+
// Create exclusive end range
247+
range = new Range(start, end);
234248
return true;
235249
}
236250
}

src/HttpUserAgentParser/HttpUserAgentStatics.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ internal static readonly (string Name, string DetectToken, string? VersionToken)
187187
("Opera", "OPR", null),
188188
("Flock", "Flock", null),
189189
("Edge", "Edge", null),
190+
("Edge", "EdgiOS", null),
190191
("Edge", "EdgA", null),
191192
("Edge", "Edg", null),
192193
("Vivaldi", "Vivaldi", null),
@@ -208,7 +209,6 @@ internal static readonly (string Name, string DetectToken, string? VersionToken)
208209
("Netscape", "Netscape", null),
209210
("OmniWeb", "OmniWeb", null),
210211
("Safari", "Version/", "Version/"),
211-
("Mozilla", "Mozilla", null),
212212
("Konqueror", "Konqueror", null),
213213
("iCab", "icab", null),
214214
("Lynx", "Lynx", null),

tests/HttpUserAgentParser.UnitTests/HttpUserAgentParserTests.cs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,4 +173,54 @@ public void BotTests(string ua, string name)
173173
Assert.False(uaInfo.IsMobile());
174174
Assert.True(uaInfo.IsRobot());
175175
}
176+
177+
[Theory]
178+
[InlineData("")]
179+
[InlineData("???")]
180+
[InlineData("NotAUserAgent")]
181+
[InlineData("Mozilla")]
182+
[InlineData("Mozilla/")]
183+
[InlineData("()")]
184+
[InlineData("UserAgent/")]
185+
[InlineData("Bot/123 (")]
186+
[InlineData("123456")]
187+
[InlineData("curl")]
188+
[InlineData("invalid/useragent")]
189+
[InlineData("Mozilla (Windows)")]
190+
[InlineData("Chrome/ABC")]
191+
[InlineData(";;!!##")]
192+
[InlineData("Safari/ ")]
193+
[InlineData("Opera( )")]
194+
[InlineData("Mozilla/5.0 (X11; ) Gecko")]
195+
[InlineData("FakeUA/1.0 (Test)???")]
196+
[InlineData("Mozilla/ (iPhone; U; CPU iPhone OS like Mac OS X) AppleWebKit/ (KHTML, like Gecko) Version/ Mobile/ Safari/")]
197+
[InlineData("Mozzila/5.0 (Windows NT 10.0; Win64; x64)")]
198+
[InlineData("Chorme/91.0.4472.124 (Windows NT 10.0; Win64; x64)")]
199+
[InlineData("FireFoxx/89.0 (Macintosh; Intel Mac OS X 10_15_7)")]
200+
[InlineData("Safarii/14.1 (iPhone; CPU iPhone OS 14_6 like Mac OS X)")]
201+
[InlineData("InternetExploder/11.0 (Windows NT 6.1; WOW64)")]
202+
[InlineData("Bravee/1.25.72 (Windows NT 10.0; Win64; x64)")]
203+
[InlineData("Mozzila/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0)")]
204+
[InlineData("Chromee/99.0.4758.102 (X11; Linux x86_64)")]
205+
[InlineData("FirreFox/100.0 (Windows NT 10.0; rv:100.0)")]
206+
[InlineData("Saffari/605.1.15 (iPad; CPU OS 14_6 like Mac OS X)")]
207+
[InlineData("Edgg/103.0.1264.37 (Macintosh; Intel Mac OS X 11_5_2)")]
208+
[InlineData("Chorome/91.0.4472.124 (Linux; Android 10; SM-G973F)")]
209+
[InlineData("Edgee/18.18363 (Windows 10 1909; Win64; x64)")]
210+
public void InvalidUserAgent(string userAgent)
211+
{
212+
HttpUserAgentInformation info = HttpUserAgentInformation.Parse(userAgent);
213+
214+
// Invalid or malformed UAs must be classified as Unknown
215+
Assert.Equal(HttpUserAgentType.Unknown, info.Type);
216+
Assert.Null(info.Name);
217+
Assert.Null(info.Version);
218+
219+
// Parser trims input via Cleanup, so compare to trimmed UA
220+
Assert.Equal(userAgent.Trim(), info.UserAgent);
221+
222+
// Should not be considered a browser or a robot
223+
Assert.False(info.IsBrowser());
224+
Assert.False(info.IsRobot());
225+
}
176226
}

0 commit comments

Comments
 (0)