Skip to content

Commit 1148f32

Browse files
committed
fix up tutorial links
1 parent f4e0e4a commit 1148f32

File tree

2 files changed

+123
-19
lines changed

2 files changed

+123
-19
lines changed

orama/indexing/MarkdownIndexer.ts

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,8 @@ import type {
66

77
const H1_REGEX = /^# (.+)$/m;
88
const FRONTMATTER_TITLE_REGEX = /title: ["'](.+)["']/;
9-
const DESCRIPTION_REGEX private buildPath(relativePath: string): string {
10-
let p = relativePath.replace(/\.(md|mdx)$/, "");
11-
// Normalize path separators to forward slashes for web paths
12-
p = p.replace(/\\/g, "/");
13-
if (p.endsWith("/index")) {
14-
p = p.replace(/\/index$/, "/");
15-
}
16-
if (!p.startsWith("/")) {
17-
p = "/" + p;
18-
}
19-
return p;
20-
}ption: ["'](.+)["']/;
9+
const DESCRIPTION_REGEX = /description: ["'](.+)["']/;
10+
const FRONTMATTER_URL_REGEX = /url:\s*(.+)/;
2111
const TAGS_REGEX = /tags: \[(.*?)\]/;
2212
const FRONTMATTER_COMMAND_REGEX = /\bcommand:\s*(["']?)([a-zA-Z0-9_-]+)\1/;
2313

@@ -121,14 +111,14 @@ export class MarkdownIndexer implements IIndexDocuments {
121111
// Get category and section
122112
const { category, section, subsection } = this.getCategoryAndSection(relPath);
123113

124-
const url = this.buildUrl(relPath);
114+
const url = this.buildUrl(relPath, frontmatter);
125115

126116
return {
127117
id: this.generateId(relPath),
128118
title: title,
129119
content: prefixedContent,
130120
url,
131-
path: this.buildPath(relPath),
121+
path: this.buildPath(relPath, frontmatter),
132122
category: category,
133123
section: section,
134124
subsection: subsection || undefined,
@@ -212,10 +202,25 @@ export class MarkdownIndexer implements IIndexDocuments {
212202
.toLowerCase();
213203
}
214204

215-
private buildUrl(relativePath: string): string {
205+
private buildUrl(relativePath: string, frontmatter?: string): string {
206+
// Check for frontmatter URL first
207+
if (frontmatter) {
208+
const urlMatch = frontmatter.match(FRONTMATTER_URL_REGEX);
209+
if (urlMatch) {
210+
let frontmatterUrl = urlMatch[1].trim();
211+
// Remove quotes if present
212+
frontmatterUrl = frontmatterUrl.replace(/^['"`]|['"`]$/g, '');
213+
// Ensure it starts with /
214+
if (!frontmatterUrl.startsWith("/")) {
215+
frontmatterUrl = "/" + frontmatterUrl;
216+
}
217+
const BASE_URL = "https://docs.deno.com";
218+
return `${BASE_URL}${frontmatterUrl}`;
219+
}
220+
}
221+
222+
// Fall back to path-based URL
216223
let url = relativePath.replace(/\.(md|mdx)$/, "");
217-
// Normalize path separators to forward slashes for web URLs
218-
url = url.replace(/\\/g, "/");
219224
if (url.endsWith("/index")) {
220225
url = url.replace(/\/index$/, "/");
221226
}
@@ -226,8 +231,26 @@ export class MarkdownIndexer implements IIndexDocuments {
226231
const BASE_URL = "https://docs.deno.com"; // Replace with your actual base URL
227232

228233
return `${BASE_URL}${url}`;
229-
} private buildPath(relativePath: string): string {
230-
let p = relativePath.replace(/\.(md|mdx)$/, "");
234+
}
235+
236+
private buildPath(relativePath: string, frontmatter?: string): string {
237+
// Check for frontmatter URL first
238+
if (frontmatter) {
239+
const urlMatch = frontmatter.match(FRONTMATTER_URL_REGEX);
240+
if (urlMatch) {
241+
let frontmatterUrl = urlMatch[1].trim();
242+
// Remove quotes if present
243+
frontmatterUrl = frontmatterUrl.replace(/^['"`]|['"`]$/g, '');
244+
// Ensure it starts with /
245+
if (!frontmatterUrl.startsWith("/")) {
246+
frontmatterUrl = "/" + frontmatterUrl;
247+
}
248+
return frontmatterUrl;
249+
}
250+
}
251+
252+
// Fall back to path-based URL
253+
let p = relativePath.replace(/\.(md|mdx)$/, "");
231254
if (p.endsWith("/index")) {
232255
p = p.replace(/\/index$/, "/");
233256
}

search.client.ts

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,11 @@ class OramaSearch {
567567
cleanTitle(title: string): string {
568568
if (!title) return title;
569569

570+
// Handle breadcrumb-style titles with intelligent processing
571+
if (title.includes("\\")) {
572+
return this.processBreadcrumbTitle(title);
573+
}
574+
570575
// Remove "jump to heading" and similar text patterns from titles
571576
let cleaned = title
572577
.replace(/\s*Jump\s+to\s+heading\s*/gi, "")
@@ -588,6 +593,82 @@ class OramaSearch {
588593
return cleaned;
589594
}
590595

596+
// Process breadcrumb-style titles intelligently
597+
processBreadcrumbTitle(title: string): string {
598+
if (!title) return title;
599+
600+
// Split by common path separators
601+
const parts = title.split(/[\\\/\|>]+/).map((part) => part.trim()).filter(
602+
(part) => part,
603+
);
604+
605+
if (parts.length <= 1) return title;
606+
607+
// Get the last meaningful part (not "Index")
608+
let lastPart = parts[parts.length - 1];
609+
if (lastPart.toLowerCase() === "index" && parts.length > 1) {
610+
lastPart = parts[parts.length - 2];
611+
}
612+
613+
// Handle specific known patterns where we want to preserve more context
614+
const contextualMappings: Record<string, string> = {
615+
"Support": "Support and Feedback", // When we see just "Support", enhance it
616+
};
617+
618+
// Check if this last part needs enhancement
619+
if (contextualMappings[lastPart]) {
620+
return contextualMappings[lastPart];
621+
}
622+
623+
// For other cases, use intelligent processing
624+
return this.enhanceBreadcrumbPart(lastPart, parts);
625+
}
626+
627+
// Enhance a breadcrumb part with context from the full path
628+
enhanceBreadcrumbPart(part: string, _fullPath: string[]): string {
629+
if (!part) return part;
630+
631+
// Handle common acronyms that shouldn't be split
632+
const acronyms = [
633+
"API",
634+
"JWT",
635+
"HTTP",
636+
"HTTPS",
637+
"URL",
638+
"UUID",
639+
"JSON",
640+
"XML",
641+
"HTML",
642+
"CSS",
643+
"JS",
644+
"TS",
645+
];
646+
if (acronyms.includes(part.toUpperCase())) {
647+
return part.toUpperCase();
648+
}
649+
650+
// Convert from various formats to readable text
651+
let enhanced = part
652+
.replace(/[_-]/g, " ") // Replace underscores and hyphens with spaces
653+
.replace(/([a-z])([A-Z])/g, "$1 $2") // Add spaces between camelCase words
654+
.replace(/\s+/g, " ") // Normalize whitespace
655+
.trim();
656+
657+
// Capitalize properly
658+
enhanced = enhanced.split(" ")
659+
.map((word) => {
660+
// Keep acronyms uppercase
661+
if (acronyms.includes(word.toUpperCase())) {
662+
return word.toUpperCase();
663+
}
664+
// Normal word capitalization
665+
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
666+
})
667+
.join(" ");
668+
669+
return enhanced;
670+
}
671+
591672
// Helper method to detect navigation/menu content
592673
isNavigationContent(hit: Hit<OramaDocument>): boolean {
593674
if (!hit.document) return false;

0 commit comments

Comments
 (0)