Use edit-distance to fix typos in cabal fields (#4722)

crtschin · web-flow · commit 91d2b4897d79 · 2025-09-24T13:44:39.000Z
* Add levenshtein scoring function

* Split off parallel matching from filtering

* Allow varying the matcher used in completions

* Add golden test for fixing field typos

* Use `edit-distannce` instead of handrolling

* Remove outdated comment from test files

* Move `Matcher` type to cabal completion module
diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal
@@ -60,6 +60,7 @@ library
     , Diff                         ^>=0.5 || ^>=1.0.0
     , directory
     , dlist
+    , edit-distance
     , enummapset
     , exceptions
     , extra                        >=1.7.14
@@ -196,6 +197,7 @@ library
     Development.IDE.Types.Shake
     Generics.SYB.GHC
     Text.Fuzzy.Parallel
+    Text.Fuzzy.Levenshtein
 
   other-modules:
     Development.IDE.Core.FileExists
diff --git a/ghcide/src/Text/Fuzzy/Levenshtein.hs b/ghcide/src/Text/Fuzzy/Levenshtein.hs
@@ -0,0 +1,16 @@
+module Text.Fuzzy.Levenshtein where
+
+import           Data.List           (sortOn)
+import           Data.Text           (Text)
+import qualified Data.Text           as T
+import           Text.EditDistance
+import           Text.Fuzzy.Parallel
+
+-- | Sort the given list according to it's levenshtein distance relative to the
+-- given string.
+levenshteinScored :: Int -> Text -> [Text] -> [Scored Text]
+levenshteinScored chunkSize needle haystack = do
+  let levenshtein = levenshteinDistance $ defaultEditCosts {substitutionCosts=ConstantCost 2}
+  sortOn score $
+    matchPar chunkSize needle haystack id $
+      \a b -> Just $ levenshtein (T.unpack a) (T.unpack b)
diff --git a/ghcide/src/Text/Fuzzy/Parallel.hs b/ghcide/src/Text/Fuzzy/Parallel.hs
@@ -1,7 +1,7 @@
 -- | Parallel versions of 'filter' and 'simpleFilter'
 
 module Text.Fuzzy.Parallel
-(   filter, filter',
+(   filter, filter', matchPar,
     simpleFilter, simpleFilter',
     match, defChunkSize, defMaxResults,
     Scored(..)
@@ -103,15 +103,29 @@ filter' :: Int           -- ^ Chunk size. 1000 works well.
        -- ^ Custom scoring function to use for calculating how close words are
        -- When the function returns Nothing, this means the values are incomparable.
        -> [Scored t]    -- ^ The list of results, sorted, highest score first.
-filter' chunkSize maxRes pat ts extract match' = partialSortByAscScore maxRes perfectScore (concat vss)
+filter' chunkSize maxRes pat ts extract match' = partialSortByAscScore maxRes perfectScore $
+    matchPar chunkSize pat' ts extract match'
   where
-      -- Preserve case for the first character, make all others lowercase
-      pat' = case T.uncons pat of
+    perfectScore = fromMaybe (error $ T.unpack pat) $ match' pat pat
+    -- Preserve case for the first character, make all others lowercase
+    pat' = case T.uncons pat of
         Just (c, rest) -> T.cons c (T.toLower rest)
         _              -> pat
-      vss = map (mapMaybe (\t -> flip Scored t <$> match' pat' (extract t))) (chunkList chunkSize ts)
+
+matchPar
+    :: Int           -- ^ Chunk size. 1000 works well.
+    -> T.Text        -- ^ Pattern.
+    -> [t]           -- ^ The list of values containing the text to search in.
+    -> (t -> T.Text) -- ^ The function to extract the text from the container.
+    -> (T.Text -> T.Text -> Maybe Int)
+    -- ^ Custom scoring function to use for calculating how close words are
+    -- When the function returns Nothing, this means the values are incomparable.
+    -> [Scored t]    -- ^ The list of results, sorted, highest score first.
+{-# INLINABLE matchPar #-}
+matchPar chunkSize pat ts extract match' = concat vss
+  where
+    vss = map (mapMaybe (\t -> flip Scored t <$> match' pat (extract t))) (chunkList chunkSize ts)
         `using` parList (evalList rseq)
-      perfectScore = fromMaybe (error $ T.unpack pat) $ match' pat' pat'
 
 -- | The function to filter a list of values by fuzzy search on the text extracted from them,
 -- using a custom matching function which determines how close words are.
diff --git a/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal.hs b/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal.hs
@@ -2,7 +2,6 @@
 {-# LANGUAGE DuplicateRecordFields #-}
 {-# LANGUAGE LambdaCase            #-}
 {-# LANGUAGE OverloadedStrings     #-}
-{-# LANGUAGE PatternSynonyms       #-}
 {-# LANGUAGE TypeFamilies          #-}
 
 module Ide.Plugin.Cabal (descriptor, haskellInteractionDescriptor, Log (..)) where
@@ -55,6 +54,8 @@ import qualified Language.LSP.Protocol.Lens                    as JL
 import qualified Language.LSP.Protocol.Message                 as LSP
 import           Language.LSP.Protocol.Types
 import qualified Language.LSP.VFS                              as VFS
+import qualified Text.Fuzzy.Levenshtein                        as Fuzzy
+import qualified Text.Fuzzy.Parallel                           as Fuzzy
 import           Text.Regex.TDFA
 
 data Log
@@ -234,7 +235,9 @@ fieldSuggestCodeAction recorder ide _ (CodeActionParams _ _ (TextDocumentIdentif
       fakeLspCursorPosition = Position lineNr (col + fromIntegral (T.length fieldName))
       lspPrefixInfo = Ghcide.getCompletionPrefixFromRope fakeLspCursorPosition fileContents
       cabalPrefixInfo = Completions.getCabalPrefixInfo fp lspPrefixInfo
-    completions <- liftIO $ computeCompletionsAt recorder ide cabalPrefixInfo fp cabalFields
+    completions <- liftIO $ computeCompletionsAt recorder ide cabalPrefixInfo fp cabalFields $
+      CompleterTypes.Matcher $
+        Fuzzy.levenshteinScored Fuzzy.defChunkSize
     let completionTexts = fmap (^. JL.label) completions
     pure $ FieldSuggest.fieldErrorAction uri fieldName completionTexts _range
 
@@ -365,12 +368,21 @@ completion recorder ide _ complParams = do
         Just (fields, _) -> do
           let lspPrefInfo = Ghcide.getCompletionPrefixFromRope position cnts
               cabalPrefInfo = Completions.getCabalPrefixInfo path lspPrefInfo
-          let res = computeCompletionsAt recorder ide cabalPrefInfo path fields
+              res = computeCompletionsAt recorder ide cabalPrefInfo path fields $
+                CompleterTypes.Matcher $
+                  Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults
           liftIO $ fmap InL res
     Nothing -> pure . InR $ InR Null
 
-computeCompletionsAt :: Recorder (WithPriority Log) -> IdeState -> Types.CabalPrefixInfo -> FilePath -> [Syntax.Field Syntax.Position] -> IO [CompletionItem]
-computeCompletionsAt recorder ide prefInfo fp fields = do
+computeCompletionsAt
+  :: Recorder (WithPriority Log)
+  -> IdeState
+  -> Types.CabalPrefixInfo
+  -> FilePath
+  -> [Syntax.Field Syntax.Position]
+  -> CompleterTypes.Matcher T.Text
+  -> IO [CompletionItem]
+computeCompletionsAt recorder ide prefInfo fp fields matcher = do
   runMaybeT (context fields) >>= \case
     Nothing -> pure []
     Just ctx -> do
@@ -390,6 +402,7 @@ computeCompletionsAt recorder ide prefInfo fp fields = do
                   case fst ctx of
                     Types.Stanza _ name -> name
                     _                   -> Nothing
+              , matcher = matcher
               }
       completions <- completer completerRecorder completerData
       pure completions
diff --git a/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Module.hs b/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Module.hs
@@ -33,8 +33,7 @@ modulesCompleter extractionFunction recorder cData = do
   case mGPD of
     Just gpd -> do
       let sourceDirs = extractionFunction sName gpd
-      filePathCompletions <-
-        filePathsForExposedModules recorder sourceDirs prefInfo
+      filePathCompletions <- filePathsForExposedModules recorder sourceDirs prefInfo (matcher cData)
       pure $ map (\compl -> mkSimpleCompletionItem (completionRange prefInfo) compl) filePathCompletions
     Nothing -> do
       logWith recorder Debug LogUseWithStaleFastNoResult
@@ -45,8 +44,13 @@ modulesCompleter extractionFunction recorder cData = do
 
 -- | Takes a list of source directories and returns a list of path completions
 --  relative to any of the passed source directories which fit the passed prefix info.
-filePathsForExposedModules :: Recorder (WithPriority Log) -> [FilePath] -> CabalPrefixInfo -> IO [T.Text]
-filePathsForExposedModules recorder srcDirs prefInfo = do
+filePathsForExposedModules
+  :: Recorder (WithPriority Log)
+  -> [FilePath]
+  -> CabalPrefixInfo
+  -> Matcher T.Text
+  -> IO [T.Text]
+filePathsForExposedModules recorder srcDirs prefInfo matcher = do
   concatForM
     srcDirs
     ( \dir' -> do
@@ -55,9 +59,8 @@ filePathsForExposedModules recorder srcDirs prefInfo = do
         completions <- listFileCompletions recorder pathInfo
         validExposedCompletions <- filterM (isValidExposedModulePath pathInfo) completions
         let toMatch = pathSegment pathInfo
-            scored = Fuzzy.simpleFilter
-              Fuzzy.defChunkSize
-              Fuzzy.defMaxResults
+            scored = runMatcher
+              matcher
               toMatch
               (map T.pack validExposedCompletions)
         forM
diff --git a/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Simple.hs b/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Simple.hs
@@ -41,7 +41,7 @@ errorNoopCompleter l recorder _ = do
 constantCompleter :: [T.Text] -> Completer
 constantCompleter completions _ cData = do
   let prefInfo = cabalPrefixInfo cData
-      scored = Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults (completionPrefix prefInfo) completions
+      scored = runMatcher (matcher cData) (completionPrefix prefInfo) completions
       range = completionRange prefInfo
   pure $ map (mkSimpleCompletionItem range . Fuzzy.original) scored
 
@@ -68,7 +68,7 @@ importCompleter l cData = do
 -- it is just forbidden on hackage.
 nameCompleter :: Completer
 nameCompleter _ cData = do
-  let scored = Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults (completionPrefix prefInfo) [completionFileName prefInfo]
+  let scored = runMatcher (matcher cData) (completionPrefix prefInfo) [completionFileName prefInfo]
       prefInfo = cabalPrefixInfo cData
       range = completionRange prefInfo
   pure $ map (mkSimpleCompletionItem range . Fuzzy.original) scored
@@ -85,6 +85,7 @@ weightedConstantCompleter completions weights _ cData = do
   let scored =
         if perfectScore > 0
           then
+            -- TODO: Would be nice to use to be able to use the matcher in `cData`
             fmap Fuzzy.original $
               Fuzzy.simpleFilter' Fuzzy.defChunkSize Fuzzy.defMaxResults prefix completions customMatch
           else topTenByWeight
diff --git a/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Types.hs b/plugins/hls-cabal-plugin/src/Ide/Plugin/Cabal/Completion/Completer/Types.hs
@@ -2,17 +2,23 @@
 
 module Ide.Plugin.Cabal.Completion.Completer.Types where
 
+import           Data.Text                         (Text)
+import qualified Data.Text                         as T
 import           Development.IDE                   as D
 import qualified Distribution.Fields               as Syntax
 import           Distribution.PackageDescription   (GenericPackageDescription)
 import qualified Distribution.Parsec.Position      as Syntax
 import           Ide.Plugin.Cabal.Completion.Types
 import           Language.LSP.Protocol.Types       (CompletionItem)
+import           Text.Fuzzy.Parallel
 
 -- | Takes information needed to build possible completion items
 -- and returns the list of possible completion items
 type Completer = Recorder (WithPriority Log) -> CompleterData -> IO [CompletionItem]
 
+-- | Type signature of completion functions ranking texts against a pattern.
+newtype Matcher a = Matcher { runMatcher :: T.Text -> [T.Text] -> [Scored a] }
+
 -- | Contains information to be used by completers.
 data CompleterData = CompleterData
   { -- | Access to the latest available generic package description for the handled cabal file,
@@ -24,5 +30,7 @@ data CompleterData = CompleterData
     -- | Prefix info to be used for constructing completion items
     cabalPrefixInfo        :: CabalPrefixInfo,
     -- | The name of the stanza in which the completer is applied
-    stanzaName             :: Maybe StanzaName
+    stanzaName             :: Maybe StanzaName,
+    -- | The matcher that'll be used to rank the texts against the pattern.
+    matcher                :: Matcher Text
   }
diff --git a/plugins/hls-cabal-plugin/test/Completer.hs b/plugins/hls-cabal-plugin/test/Completer.hs
@@ -21,13 +21,15 @@ import           Ide.Plugin.Cabal.Completion.Completer.FilePath
 import           Ide.Plugin.Cabal.Completion.Completer.Module
 import           Ide.Plugin.Cabal.Completion.Completer.Paths
 import           Ide.Plugin.Cabal.Completion.Completer.Simple   (importCompleter)
-import           Ide.Plugin.Cabal.Completion.Completer.Types    (CompleterData (..))
+import           Ide.Plugin.Cabal.Completion.Completer.Types    (CompleterData (..),
+                                                                 Matcher (..))
 import           Ide.Plugin.Cabal.Completion.Completions
 import           Ide.Plugin.Cabal.Completion.Types              (CabalPrefixInfo (..),
                                                                  StanzaName)
 import qualified Language.LSP.Protocol.Lens                     as L
 import           System.FilePath
 import           Test.Hls
+import qualified Text.Fuzzy.Parallel                            as Fuzzy
 import           Utils
 
 completerTests :: TestTree
@@ -270,7 +272,7 @@ filePathExposedModulesTests =
     callFilePathsForExposedModules :: [FilePath] -> IO [T.Text]
     callFilePathsForExposedModules srcDirs = do
       let prefInfo = simpleCabalPrefixInfoFromFp "" exposedTestDir
-      filePathsForExposedModules mempty srcDirs prefInfo
+      filePathsForExposedModules mempty srcDirs prefInfo $ Matcher $ Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults
 
 exposedModuleCompleterTests :: TestTree
 exposedModuleCompleterTests =
@@ -366,11 +368,19 @@ simpleCompleterData sName dir pref = do
         cabalContents <- ByteString.readFile $ testDataDir </> "exposed.cabal"
         pure $ parseGenericPackageDescriptionMaybe cabalContents,
       getCabalCommonSections = undefined,
-      stanzaName = sName
+      stanzaName = sName,
+      matcher = Matcher $ Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults
     }
 
 mkCompleterData :: CabalPrefixInfo -> CompleterData
-mkCompleterData prefInfo = CompleterData {getLatestGPD = undefined, getCabalCommonSections = undefined, cabalPrefixInfo = prefInfo, stanzaName = Nothing}
+mkCompleterData prefInfo =
+  CompleterData
+    { getLatestGPD = undefined,
+      getCabalCommonSections = undefined,
+      cabalPrefixInfo = prefInfo,
+      stanzaName = Nothing,
+      matcher = Matcher $ Fuzzy.simpleFilter Fuzzy.defChunkSize Fuzzy.defMaxResults
+    }
 
 exposedTestDir :: FilePath
 exposedTestDir = addTrailingPathSeparator $ testDataDir </> "src-modules"
diff --git a/plugins/hls-cabal-plugin/test/Main.hs b/plugins/hls-cabal-plugin/test/Main.hs
@@ -14,10 +14,9 @@ import           Completer                       (completerTests)
 import           Context                         (contextTests)
 import           Control.Lens                    ((^.))
 import           Control.Lens.Fold               ((^?))
-import           Control.Monad                   (guard)
+import           Control.Monad                   (forM_, guard)
 import qualified Data.ByteString                 as BS
 import           Data.Either                     (isRight)
-import           Data.List.Extra                 (nubOrdOn)
 import qualified Data.Maybe                      as Maybe
 import           Data.Text                       (Text)
 import qualified Data.Text                       as T
@@ -26,6 +25,7 @@ import           Definition                      (gotoDefinitionTests)
 import           Development.IDE.Test
 import           Ide.Plugin.Cabal.LicenseSuggest (licenseErrorSuggestion)
 import qualified Ide.Plugin.Cabal.Parse          as Lib
+import           Language.LSP.Protocol.Lens      (HasRange (..))
 import qualified Language.LSP.Protocol.Lens      as L
 import qualified Language.LSP.Protocol.Message   as L
 import           Outline                         (outlineTests)
@@ -191,32 +191,29 @@ codeActionTests = testGroup "Code Actions"
                     , "    build-depends:    base"
                     , "    default-language: Haskell2010"
                     ]
-    , runCabalGoldenSession "Code Actions - Can fix field names" "code-actions" "FieldSuggestions" $ \doc -> do
-        _ <- waitForDiagnosticsFrom doc
-        cas <- Maybe.mapMaybe (^? _R) <$> getAllCodeActions doc
-        -- Filter out the code actions we want to invoke.
-        -- We only want to invoke Code Actions with certain titles, and
-        -- we want to invoke them only once, not once for each cursor request.
-        -- 'getAllCodeActions' iterates over each cursor position and requests code actions.
-        let selectedCas = nubOrdOn (^. L.title) $ filter
-                (\ca -> (ca ^. L.title) `elem`
-                    [ "Replace with license"
-                    , "Replace with build-type"
-                    , "Replace with extra-doc-files"
-                    , "Replace with ghc-options"
-                    , "Replace with location"
-                    , "Replace with default-language"
-                    , "Replace with import"
-                    , "Replace with build-depends"
-                    , "Replace with main-is"
-                    , "Replace with hs-source-dirs"
-                    ]) cas
-        mapM_ executeCodeAction selectedCas
-        pure ()
+    , runCabalGoldenSession
+        "Code Actions - Can complete field names"
+        "code-actions"
+        "FieldSuggestions"
+        executeFirstActionPerDiagnostic
+    , runCabalGoldenSession
+        "Code Actions - Can fix field typos"
+        "code-actions"
+        "FieldSuggestionsTypos"
+        executeFirstActionPerDiagnostic
     , cabalAddDependencyTests
     , cabalAddModuleTests
     ]
   where
+    executeFirstActionPerDiagnostic doc = do
+      _ <- waitForDiagnosticsFrom doc
+      diagnotics <- getCurrentDiagnostics doc
+      -- Execute the first code action at each diagnostic point
+      forM_ diagnotics $ \diagnostic -> do
+        codeActions <- getCodeActions doc (diagnostic ^. range)
+        case codeActions of
+          []     -> pure ()
+          ca : _ -> mapM_ executeCodeAction (ca ^? _R)
     getLicenseAction :: T.Text -> [Command |? CodeAction] -> [CodeAction]
     getLicenseAction license codeActions = do
         InR action@CodeAction{_title} <- codeActions
diff --git a/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestions.cabal b/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestions.cabal
@@ -21,9 +21,8 @@ source-repository head
   loc: fake
 
 library
-    default-lang: Haskell2010
-    -- Import isn't supported right now.
     impor: warnings
+    default-lang: Haskell2010
     build-dep: base
 
 executable my-exe
diff --git a/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestions.golden.cabal b/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestions.golden.cabal
@@ -21,9 +21,8 @@ source-repository head
   location: fake
 
 library
-    default-language: Haskell2010
-    -- Import isn't supported right now.
     import: warnings
+    default-language: Haskell2010
     build-depends: base
 
 executable my-exe
diff --git a/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestionsTypos.cabal b/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestionsTypos.cabal
diff --git a/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestionsTypos.golden.cabal b/plugins/hls-cabal-plugin/test/testdata/code-actions/FieldSuggestionsTypos.golden.cabal