fix up tutorial links

thisisjofrank · thisisjofrank · commit 1148f320ebdc · 2025-08-22T17:05:58.000+01:00
diff --git a/orama/indexing/MarkdownIndexer.ts b/orama/indexing/MarkdownIndexer.ts
@@ -6,18 +6,8 @@ import type {
 
 const H1_REGEX = /^# (.+)$/m;
 const FRONTMATTER_TITLE_REGEX = /title: ["'](.+)["']/;
-const DESCRIPTION_REGEX    private buildPath(relativePath: string): string {
-        let p = relativePath.replace(/\.(md|mdx)$/, "");
-        // Normalize path separators to forward slashes for web paths
-        p = p.replace(/\\/g, "/");
-        if (p.endsWith("/index")) {
-            p = p.replace(/\/index$/, "/");
-        }
-        if (!p.startsWith("/")) {
-            p = "/" + p;
-        }
-        return p;
-    }ption: ["'](.+)["']/;
+const DESCRIPTION_REGEX = /description: ["'](.+)["']/;
+const FRONTMATTER_URL_REGEX = /url:\s*(.+)/;
 const TAGS_REGEX = /tags: \[(.*?)\]/;
 const FRONTMATTER_COMMAND_REGEX = /\bcommand:\s*(["']?)([a-zA-Z0-9_-]+)\1/;
 
@@ -121,14 +111,14 @@ export class MarkdownIndexer implements IIndexDocuments {
         // Get category and section
         const { category, section, subsection } = this.getCategoryAndSection(relPath);
 
-        const url = this.buildUrl(relPath);
+        const url = this.buildUrl(relPath, frontmatter);
 
         return {
             id: this.generateId(relPath),
             title: title,
             content: prefixedContent,
             url,
-            path: this.buildPath(relPath),
+            path: this.buildPath(relPath, frontmatter),
             category: category,
             section: section,
             subsection: subsection || undefined,
@@ -212,10 +202,25 @@ export class MarkdownIndexer implements IIndexDocuments {
             .toLowerCase();
     }
 
-    private buildUrl(relativePath: string): string {
+    private buildUrl(relativePath: string, frontmatter?: string): string {
+        // Check for frontmatter URL first
+        if (frontmatter) {
+            const urlMatch = frontmatter.match(FRONTMATTER_URL_REGEX);
+            if (urlMatch) {
+                let frontmatterUrl = urlMatch[1].trim();
+                // Remove quotes if present
+                frontmatterUrl = frontmatterUrl.replace(/^['"`]|['"`]$/g, '');
+                // Ensure it starts with /
+                if (!frontmatterUrl.startsWith("/")) {
+                    frontmatterUrl = "/" + frontmatterUrl;
+                }
+                const BASE_URL = "https://docs.deno.com";
+                return `${BASE_URL}${frontmatterUrl}`;
+            }
+        }
+
+        // Fall back to path-based URL
         let url = relativePath.replace(/\.(md|mdx)$/, "");
-        // Normalize path separators to forward slashes for web URLs
-        url = url.replace(/\\/g, "/");
         if (url.endsWith("/index")) {
             url = url.replace(/\/index$/, "/");
         }
@@ -226,8 +231,26 @@ export class MarkdownIndexer implements IIndexDocuments {
         const BASE_URL = "https://docs.deno.com"; // Replace with your actual base URL
 
         return `${BASE_URL}${url}`;
-    }    private buildPath(relativePath: string): string {
-    let p = relativePath.replace(/\.(md|mdx)$/, "");
+    }
+
+    private buildPath(relativePath: string, frontmatter?: string): string {
+        // Check for frontmatter URL first
+        if (frontmatter) {
+            const urlMatch = frontmatter.match(FRONTMATTER_URL_REGEX);
+            if (urlMatch) {
+                let frontmatterUrl = urlMatch[1].trim();
+                // Remove quotes if present
+                frontmatterUrl = frontmatterUrl.replace(/^['"`]|['"`]$/g, '');
+                // Ensure it starts with /
+                if (!frontmatterUrl.startsWith("/")) {
+                    frontmatterUrl = "/" + frontmatterUrl;
+                }
+                return frontmatterUrl;
+            }
+        }
+
+        // Fall back to path-based URL
+        let p = relativePath.replace(/\.(md|mdx)$/, "");
         if (p.endsWith("/index")) {
             p = p.replace(/\/index$/, "/");
         }
diff --git a/search.client.ts b/search.client.ts
@@ -567,6 +567,11 @@ class OramaSearch {
   cleanTitle(title: string): string {
     if (!title) return title;
 
+    // Handle breadcrumb-style titles with intelligent processing
+    if (title.includes("\\")) {
+      return this.processBreadcrumbTitle(title);
+    }
+
     // Remove "jump to heading" and similar text patterns from titles
     let cleaned = title
       .replace(/\s*Jump\s+to\s+heading\s*/gi, "")
@@ -588,6 +593,82 @@ class OramaSearch {
     return cleaned;
   }
 
+  // Process breadcrumb-style titles intelligently
+  processBreadcrumbTitle(title: string): string {
+    if (!title) return title;
+
+    // Split by common path separators
+    const parts = title.split(/[\\\/\|>]+/).map((part) => part.trim()).filter(
+      (part) => part,
+    );
+
+    if (parts.length <= 1) return title;
+
+    // Get the last meaningful part (not "Index")
+    let lastPart = parts[parts.length - 1];
+    if (lastPart.toLowerCase() === "index" && parts.length > 1) {
+      lastPart = parts[parts.length - 2];
+    }
+
+    // Handle specific known patterns where we want to preserve more context
+    const contextualMappings: Record<string, string> = {
+      "Support": "Support and Feedback", // When we see just "Support", enhance it
+    };
+
+    // Check if this last part needs enhancement
+    if (contextualMappings[lastPart]) {
+      return contextualMappings[lastPart];
+    }
+
+    // For other cases, use intelligent processing
+    return this.enhanceBreadcrumbPart(lastPart, parts);
+  }
+
+  // Enhance a breadcrumb part with context from the full path
+  enhanceBreadcrumbPart(part: string, _fullPath: string[]): string {
+    if (!part) return part;
+
+    // Handle common acronyms that shouldn't be split
+    const acronyms = [
+      "API",
+      "JWT",
+      "HTTP",
+      "HTTPS",
+      "URL",
+      "UUID",
+      "JSON",
+      "XML",
+      "HTML",
+      "CSS",
+      "JS",
+      "TS",
+    ];
+    if (acronyms.includes(part.toUpperCase())) {
+      return part.toUpperCase();
+    }
+
+    // Convert from various formats to readable text
+    let enhanced = part
+      .replace(/[_-]/g, " ") // Replace underscores and hyphens with spaces
+      .replace(/([a-z])([A-Z])/g, "$1 $2") // Add spaces between camelCase words
+      .replace(/\s+/g, " ") // Normalize whitespace
+      .trim();
+
+    // Capitalize properly
+    enhanced = enhanced.split(" ")
+      .map((word) => {
+        // Keep acronyms uppercase
+        if (acronyms.includes(word.toUpperCase())) {
+          return word.toUpperCase();
+        }
+        // Normal word capitalization
+        return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
+      })
+      .join(" ");
+
+    return enhanced;
+  }
+
   // Helper method to detect navigation/menu content
   isNavigationContent(hit: Hit<OramaDocument>): boolean {
     if (!hit.document) return false;