Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Version [v1.13.0] - 2025-06-19

### Changed

* Improved the search tokenizer and custom trimmer to improve search results. ([#2744])

### Added

* Added new type `RawHTMLHeadContent` to `HTML` format object, which allows to add raw HTML to the head of the HTML output, by passing it as a element in the `assets` keyword argument. ([#2726])
Expand Down Expand Up @@ -2129,6 +2133,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[#2726]: https://github.com/JuliaDocs/Documenter.jl/issues/2726
[#2729]: https://github.com/JuliaDocs/Documenter.jl/issues/2729
[#2737]: https://github.com/JuliaDocs/Documenter.jl/issues/2737
[#2744]: https://github.com/JuliaDocs/Documenter.jl/issues/2744
[#2748]: https://github.com/JuliaDocs/Documenter.jl/issues/2748
[#2750]: https://github.com/JuliaDocs/Documenter.jl/issues/2750
[JuliaLang/julia#36953]: https://github.com/JuliaLang/julia/issues/36953
Expand Down
92 changes: 88 additions & 4 deletions assets/html/js/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,10 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
processTerm: (term) => {
let word = stopWords.has(term) ? null : term;
if (word) {
// custom trimmer that doesn't strip @ and !, which are used in julia macro and function names
// custom trimmer that doesn't strip (@,!,+, -, *,/,^,&, |, %,<, >, =, :, .) which are used in julia macro,function names and identifiers
word = word
.replace(/^[^a-zA-Z0-9@!]+/, "")
.replace(/[^a-zA-Z0-9@!]+$/, "");
.replace(/^[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+/, "")
.replace(/[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+$/, "");

word = word.toLowerCase();
}
Expand All @@ -204,7 +204,53 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
},
// add . as a separator, because otherwise "title": "Documenter.Anchors.add!", would not
// find anything if searching for "add!", only for the entire qualification
tokenize: (string) => string.split(/[\s\-\.]+/),
//updated tokenizer
tokenize: (string) => {
const tokens = [];
let remaining = string;

// julia specific patterns
const patterns = [
// Module qualified names (e.g., Base.sort, Module.Submodule. function)
/\b[A-Za-z0-9_1*(?:\.[A-Z][A-Za-z0-9_1*)*\.[a-z_][A-Za-z0-9_!]*\b/g,
// Macro calls (e.g., @time, @async)
/@[A-Za-z0-9_]*/g,
// Type parameters (e.g., Array{T,N}, Vector{Int})
/\b[A-Za-z0-9_]*\{[^}]+\}/g,
// Function names with module qualification (e.g., Base.+, Base.:^)
/\b[A-Za-z0-9_]*\.:[A-Za-z0-9_!+\-*/^&|%<>=.]+/g,
// Operators as complete tokens (e.g., !=, aã, ||, ^, .=, →)
/[!<>=+\-*/^&|%:.]+/g,
// Function signatures with type annotations (e.g., f(x::Int))
/\b[A-Za-z0-9_!]*\([^)]*::[^)]*\)/g,
// Numbers (integers, floats,scientific notation)
/\b\d+(?:\.\d+)? (?:[eE][+-]?\d+)?\b/g,
];

// apply patterns in order of specificity
for (const pattern of patterns) {
pattern.lastIndex = 0; //reset regex state
let match;
while ((match = pattern.exec(remaining)) != null) {
const token = match[0].trim();
if (token && !tokens.includes(token)) {
tokens.push(token);
}
}
}

// splitting the content if something remains
const basicTokens = remaining
.split(/[\s\-,;()[\]{}]+/)
.filter((t) => t.trim());
for (const token of basicTokens) {
if (token && !tokens.includes(token)) {
tokens.push(token);
}
}

return tokens.filter((token) => token.length > 0);
},
// options which will be applied during the search
searchOptions: {
prefix: true,
Expand Down Expand Up @@ -327,6 +373,35 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
return result_div;
}

function calculateCustomScore(result, query) {
const titleLower = result.title.toLowerCase();
const queryLower = query.toLowerCase();

// Tier 1 : Exact title match
if (titleLower == queryLower) {
return 10000 + result.score;
}

// Tier 2 : Title contains exact query
if (titleLower.includes(queryLower)) {
const position = titleLower.indexOf(queryLower);
// prefer matches at the beginning
return 5000 + result.score - position * 10;
}

// Tier 3 : All query words in title
const queryWords = queryLower.trim().split(/\s+/);
const titleWords = titleLower.trim().split(/\s+/);
const allWordsInTitle = queryWords.every((qw) =>
titleWords.some((tw) => tw.includes(qw)),
);
if (allWordsInTitle) {
return 2000 + result.score;
}

return result.score;
}

self.onmessage = function (e) {
let query = e.data;
let results = index.search(query, {
Expand All @@ -337,6 +412,15 @@ function worker_function(documenterSearchIndex, documenterBaseURL, filters) {
combineWith: "AND",
});

//calculate custom scores for all results
results = results.map((result) => ({
...result,
customScore: calculateCustomScore(result, query),
}));

// sort by custom score in descending order
results.sort((a, b) => b.customScore - a.customScore);

// Pre-filter to deduplicate and limit to 200 per category to the extent
// possible without knowing what the filters are.
let filtered_results = [];
Expand Down
56 changes: 53 additions & 3 deletions test/search/wrapper.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,62 @@ const index = new MiniSearch({
processTerm: (term) => {
let word = stopWords.has(term) ? null : term;
if (word) {
word = word.replace(/^[^a-zA-Z0-9@!]+/, "").replace(/[^a-zA-Z0-9@!]+$/, "");
word = word.toLowerCase();
// custom trimmer that doesn't strip (@,!,+, -, *,/,^,&, |, %,<, >, =, :, .) which are used in julia macro,function names and identifiers
word = word
.replace(/^[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+/, "")
.replace(/[^a-zA-Z0-9@!+\-/*^&%|<>._=:]+$/, "");

word = word.toLowerCase();
}

return word ?? null;
},
tokenize: (string) => string.split(/[\s\-\.]+/),
tokenize: (string) => {
const tokens = [];
let remaining = string;

// julia specific patterns
const patterns = [
// Module qualified names (e.g., Base.sort, Module.Submodule. function)
/\b[A-Za-z0-9_1*(?:\.[A-Z][A-Za-z0-9_1*)*\.[a-z_][A-Za-z0-9_!]*\b/g,
// Macro calls (e.g., @time, @async)
/@[A-Za-z0-9_]*/g,
// Type parameters (e.g., Array{T,N}, Vector{Int})
/\b[A-Za-z0-9_]*\{[^}]+\}/g,
// Function names with module qualification (e.g., Base.+, Base.:^)
/\b[A-Za-z0-9_]*\.:[A-Za-z0-9_!+\-*/^&|%<>=.]+/g,
// Operators as complete tokens (e.g., !=, aã, ||, ^, .=, →)
/[!<>=+\-*/^&|%:.]+/g,
// Function signatures with type annotations (e.g., f(x::Int))
/\b[A-Za-z0-9_!]*\([^)]*::[^)]*\)/g,
// Numbers (integers, floats,scientific notation)
/\b\d+(?:\.\d+)? (?:[eE][+-]?\d+)?\b/g,
];

// apply patterns in order of specificity
for (const pattern of patterns) {
pattern.lastIndex = 0; //reset regex state
let match;
while ((match = pattern.exec(remaining)) != null) {
const token = match[0].trim();
if (token && !tokens.includes(token)) {
tokens.push(token);
}
}
}

// splitting the content if something remains
const basicTokens = remaining
.split(/[\s\-,;()[\]{}]+/)
.filter((t) => t.trim());
for (const token of basicTokens) {
if (token && !tokens.includes(token)) {
tokens.push(token);
}
}

return tokens.filter((token) => token.length > 0);
},
searchOptions: { prefix: true, boost: { title: 100 }, fuzzy: 2 }
});

Expand Down
Loading