diff --git a/README.md b/README.md index fb7fe1bb..2df5740e 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,19 @@ Commands may be of the form { "cmd" : "example : f = 2 := rfl", "env" : 1 } ``` +```json +{ "cmd" : "example : f = 2 := rfl", "env" : 1, "record": false } +``` + +The input includes: +* A required `cmd` field containing the Lean command to execute +* An optional `env` field specifying a previous environment ID to build upon +* An optional `record` field (default: true) to record the environment after execution +* Optional fields for additional information: + * `allTactics`: when true, includes all tactic steps in the response + * `rootGoals`: when true, includes root goals as sorries + * `infotree`: specifies the level of info tree detail ("full", "tactics", "original", or "substantive") + The `env` field, if present, must contain a number received in the `env` field of a previous response, and causes the command to be run in the existing environment. @@ -33,6 +46,7 @@ You can backtrack simply by using earlier values for `env`. The response includes: * A numeric label for the `Environment` after your command, which you can use as the starting point for subsequent commands. + This will be `none` if `record` was set to false. * Any messages generated while processing your command. * A list of the `sorry`s in your command, including * their expected type, and diff --git a/REPL/JSON.lean b/REPL/JSON.lean index ebec4c56..ca500350 100644 --- a/REPL/JSON.lean +++ b/REPL/JSON.lean @@ -23,10 +23,13 @@ structure CommandOptions where /-- Run Lean commands. If `env = none`, starts a new session (in which you can use `import`). If `env = some n`, builds on the existing environment `n`. +Setting `record = false` will discard the environment after execution, useful for memory management. +When `record = false`, the response's `env` field will be `none`. -/ structure Command extends CommandOptions where env : Option Nat cmd : String + record : Option Bool := true deriving ToJson, FromJson /-- Process a Lean file in a fresh environment if `env` is not provided. -/ @@ -41,6 +44,7 @@ Run a tactic in a proof state. structure ProofStep where proofState : Nat tactic : String + record: Option Bool := true deriving ToJson, FromJson /-- Line and column information for error messages and sorries. -/ @@ -122,7 +126,7 @@ A response to a Lean command. `env` can be used in later calls, to build on the stored environment. -/ structure CommandResponse where - env : Nat + env : Option Nat := none messages : List Message := [] sorries : List Sorry := [] tactics : List Tactic := [] @@ -133,9 +137,13 @@ def Json.nonemptyList [ToJson α] (k : String) : List α → List (String × Jso | [] => [] | l => [⟨k, toJson l⟩] +def Json.optField [ToJson α] (k : String) : Option α → List (String × Json) + | none => [] + | some v => [(k, toJson v)] + instance : ToJson CommandResponse where toJson r := Json.mkObj <| .flatten [ - [("env", r.env)], + Json.optField "env" r.env, Json.nonemptyList "messages" r.messages, Json.nonemptyList "sorries" r.sorries, Json.nonemptyList "tactics" r.tactics, @@ -147,7 +155,7 @@ A response to a Lean tactic. `proofState` can be used in later calls, to run further tactics. -/ structure ProofStepResponse where - proofState : Nat + proofState : Option Nat goals : List String messages : List Message := [] sorries : List Sorry := [] @@ -157,7 +165,7 @@ deriving ToJson, FromJson instance : ToJson ProofStepResponse where toJson r := Json.mkObj <| .flatten [ - [("proofState", r.proofState)], + Json.optField "proofState" r.proofState, [("goals", toJson r.goals)], Json.nonemptyList "messages" r.messages, Json.nonemptyList "sorries" r.sorries, diff --git a/REPL/Main.lean b/REPL/Main.lean index d181080f..00844b25 100644 --- a/REPL/Main.lean +++ b/REPL/Main.lean @@ -231,8 +231,8 @@ def getProofStatus (proofState : ProofSnapshot) : M m String := do | _ => return "Incomplete: open goals remain" /-- Record a `ProofSnapshot` and generate a JSON response for it. -/ -def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none) : - M m ProofStepResponse := do +def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none) + (record : Option Bool := true) : M m ProofStepResponse := do let messages := proofState.newMessages old? let messages ← messages.mapM fun m => Message.of m let traces ← proofState.newTraces old? @@ -246,7 +246,9 @@ def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnap -- For debugging purposes, sometimes we print out the trees here: -- trees.forM fun t => do IO.println (← t.format) let sorries ← sorries trees none (some proofState.rootGoals) - let id ← recordProofSnapshot proofState + let id ← match record with + | some false => pure none + | _ => some <$> recordProofSnapshot proofState return { proofState := id goals := (← proofState.ppGoals).map fun s => s!"{s}" @@ -323,7 +325,12 @@ def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do fileMap := default, snap? := none, cancelTk? := none } } - let env ← recordCommandSnapshot cmdSnapshot + let env ← match s.record with + | some false => do pure none + | _ => + let id ← recordCommandSnapshot cmdSnapshot + pure (some id) + let jsonTrees := match s.infotree with | some "full" => trees | some "tactics" => trees.flatMap InfoTree.retainTacticInfo @@ -358,7 +365,7 @@ def runProofStep (s : ProofStep) : M IO (ProofStepResponse ⊕ Error) := do | some proofState => try let proofState' ← proofState.runString s.tactic - return .inl (← createProofStepReponse proofState' proofState) + return .inl (← createProofStepReponse proofState' proofState s.record) catch ex => return .inr ⟨"Lean error:\n" ++ ex.toString⟩ @@ -419,23 +426,28 @@ def printFlush [ToString α] (s : α) : IO Unit := do out.flush -- Flush the output /-- Read-eval-print loop for Lean. -/ -unsafe def repl : IO Unit := - StateT.run' loop {} -where loop : M IO Unit := do - let query ← getLines - if query = "" then - return () - if query.startsWith "#" || query.startsWith "--" then loop else - IO.println <| toString <| ← match ← parse query with - | .command r => return toJson (← runCommand r) - | .file r => return toJson (← processFile r) - | .proofStep r => return toJson (← runProofStep r) - | .pickleEnvironment r => return toJson (← pickleCommandSnapshot r) - | .unpickleEnvironment r => return toJson (← unpickleCommandSnapshot r) - | .pickleProofSnapshot r => return toJson (← pickleProofSnapshot r) - | .unpickleProofSnapshot r => return toJson (← unpickleProofSnapshot r) - printFlush "\n" -- easier to parse the output if there are blank lines - loop +unsafe def repl : IO Unit := do + let rec loop (s : State) : IO Unit := do + let query ← getLines + if query = "" then + return () + else if query.startsWith "#" || query.startsWith "--" then + loop s + else + let (result, s') ← StateT.run (do + match ← parse query with + | .command r => pure <| toJson (← runCommand r) + | .file r => pure <| toJson (← processFile r) + | .proofStep r => pure <| toJson (← runProofStep r) + | .pickleEnvironment r => pure <| toJson (← pickleCommandSnapshot r) + | .unpickleEnvironment r => pure <| toJson (← unpickleCommandSnapshot r) + | .pickleProofSnapshot r => pure <| toJson (← pickleProofSnapshot r) + | .unpickleProofSnapshot r => pure <| toJson (← unpickleProofSnapshot r) + ) s + IO.println (toString result) + printFlush "\n" -- easier to parse the output if there are blank lines + loop s' + loop {} /-- Main executable function, run as `lake exe repl`. -/ unsafe def main (_ : List String) : IO Unit := do diff --git a/test.sh b/test.sh index be8e1b3f..0af7e5e3 100755 --- a/test.sh +++ b/test.sh @@ -6,6 +6,10 @@ EXPECTED_DIR="test" lake build +# Initialize variables to track failures +failed_tests=() +failure_count=0 + # ignore locale to ensure test `bla` runs before `bla2` export LC_COLLATE=C @@ -36,11 +40,28 @@ for infile in $IN_DIR/*.in; do echo "$base: FAILED" # Rename the temporary file instead of removing it mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}" - exit 1 + failed_tests+=("$base") + ((failure_count++)) fi - done +# Print summary of failures +if [ ${#failed_tests[@]} -ne 0 ]; then + echo -e "\n=== Test Summary ===" + echo "Failed tests:" + for test in "${failed_tests[@]}"; do + echo "✗ $test" + done + echo -e "\nTotal: $failure_count failed" + echo "==================" +fi + # Run the Mathlib tests cp lean-toolchain test/Mathlib/ cd test/Mathlib/ && ./test.sh +mathlib_exit_code=$? + +# Exit with error if any test failed or if Mathlib tests failed +if [ $failure_count -gt 0 ] || [ $mathlib_exit_code -ne 0 ]; then + exit 1 +fi diff --git a/test/Mathlib/test.sh b/test/Mathlib/test.sh index 0f9f89fe..b1f220df 100755 --- a/test/Mathlib/test.sh +++ b/test/Mathlib/test.sh @@ -4,6 +4,10 @@ IN_DIR="test" EXPECTED_DIR="test" +# Initialize variables to track failures +failed_tests=() +failure_count=0 + lake exe cache get > /dev/null lake build Mathlib @@ -34,8 +38,21 @@ for infile in $IN_DIR/*.in; do echo "$base: FAILED" # Rename the temporary file instead of removing it mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}" - exit 1 + failed_tests+=("$base") + ((failure_count++)) fi - done +# Print summary of failures +if [ ${#failed_tests[@]} -ne 0 ]; then + echo -e "\n=== Mathlib Test Summary ===" + echo "Failed tests:" + for test in "${failed_tests[@]}"; do + echo "✗ $test" + done + echo -e "\nTotal: $failure_count failed" + echo "========================" + exit 1 +fi + +exit 0 diff --git a/test/Mathlib/test/record_cmd.expected.out b/test/Mathlib/test/record_cmd.expected.out new file mode 100644 index 00000000..7f30f1ce --- /dev/null +++ b/test/Mathlib/test/record_cmd.expected.out @@ -0,0 +1,29 @@ +{"env": 0} + +{"sorries": + [{"proofState": 0, + "pos": {"line": 1, "column": 93}, + "goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0", + "endPos": {"line": 1, "column": 98}}], + "messages": + [{"severity": "warning", + "pos": {"line": 1, "column": 8}, + "endPos": {"line": 1, "column": 30}, + "data": "declaration uses 'sorry'"}]} + +{"proofStatus": "Completed", "proofState": 1, "goals": []} + +{"sorries": + [{"proofState": 2, + "pos": {"line": 1, "column": 93}, + "goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0", + "endPos": {"line": 1, "column": 98}}], + "messages": + [{"severity": "warning", + "pos": {"line": 1, "column": 8}, + "endPos": {"line": 1, "column": 30}, + "data": "declaration uses 'sorry'"}], + "env": 1} + +{"proofStatus": "Completed", "proofState": 3, "goals": []} + diff --git a/test/Mathlib/test/record_cmd.in b/test/Mathlib/test/record_cmd.in new file mode 100644 index 00000000..22dcb8a4 --- /dev/null +++ b/test/Mathlib/test/record_cmd.in @@ -0,0 +1,9 @@ +{"cmd": "import Mathlib"} + +{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0, "record": false} + +{"tactic": "norm_num", "proofState": 0} + +{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0} + +{"tactic": "norm_num", "proofState": 2} diff --git a/test/record_cmd.expected.out b/test/record_cmd.expected.out new file mode 100644 index 00000000..5778387e --- /dev/null +++ b/test/record_cmd.expected.out @@ -0,0 +1,17 @@ +{"env": 0} + +{"messages": + [{"severity": "info", + "pos": {"line": 1, "column": 0}, + "endPos": {"line": 1, "column": 5}, + "data": "true"}]} + +{"message": "Unknown environment."} + +{"messages": + [{"severity": "info", + "pos": {"line": 1, "column": 0}, + "endPos": {"line": 1, "column": 5}, + "data": "37"}], + "env": 1} + diff --git a/test/record_cmd.in b/test/record_cmd.in new file mode 100644 index 00000000..8522b4c7 --- /dev/null +++ b/test/record_cmd.in @@ -0,0 +1,7 @@ +{"cmd": "def f := 37"} + +{"cmd": "#eval true", "record": false} + +{"cmd": "#eval false", "env":1} + +{"cmd": "#eval f", "env":0} diff --git a/test/record_cmd2.expected.out b/test/record_cmd2.expected.out new file mode 100644 index 00000000..f581d6cf --- /dev/null +++ b/test/record_cmd2.expected.out @@ -0,0 +1,35 @@ +{"messages": + [{"severity": "error", + "pos": {"line": 1, "column": 20}, + "endPos": null, + "data": "unexpected end of input; expected '{'"}, + {"severity": "error", + "pos": {"line": 1, "column": 18}, + "endPos": {"line": 1, "column": 20}, + "data": "unsolved goals\n⊢ 2 = 2"}], + "env": 0} + +{"env": 1} + +{"messages": + [{"severity": "error", + "pos": {"line": 1, "column": 66}, + "endPos": null, + "data": "unexpected end of input; expected '{'"}, + {"severity": "error", + "pos": {"line": 1, "column": 64}, + "endPos": {"line": 1, "column": 66}, + "data": "unsolved goals\nP Q R : Prop\n⊢ (P → Q) → (Q → R) → P → R"}]} + +{"messages": + [{"severity": "error", + "pos": {"line": 1, "column": 64}, + "endPos": {"line": 2, "column": 13}, + "data": "unsolved goals\nP Q R : Prop\nh1 : P → Q\nh2 : Q → R\np : P\n⊢ R"}]} + +{"messages": + [{"severity": "info", + "pos": {"line": 3, "column": 0}, + "endPos": {"line": 3, "column": 6}, + "data": "Try this: exact h2 (h1 p)"}]} + diff --git a/test/record_cmd2.in b/test/record_cmd2.in new file mode 100644 index 00000000..c71c23fb --- /dev/null +++ b/test/record_cmd2.in @@ -0,0 +1,9 @@ +{"cmd": "example : 2=2 := by"} + +{"cmd": "example : 2=2 := by\nrfl"} + +{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by", "record": false} + +{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p", "record": false} + +{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p\napply?", "record": false} diff --git a/test/record_tactic.expected.out b/test/record_tactic.expected.out new file mode 100644 index 00000000..2e5fa6d7 --- /dev/null +++ b/test/record_tactic.expected.out @@ -0,0 +1,41 @@ +{"sorries": + [{"proofState": 0, + "pos": {"line": 1, "column": 20}, + "goal": "⊢ 1 = 1", + "endPos": {"line": 1, "column": 25}}], + "messages": + [{"severity": "warning", + "pos": {"line": 1, "column": 0}, + "endPos": {"line": 1, "column": 7}, + "data": "declaration uses 'sorry'"}]} + +{"sorries": + [{"proofState": 1, + "pos": {"line": 1, "column": 20}, + "goal": "⊢ 1 = 1", + "endPos": {"line": 1, "column": 25}}], + "messages": + [{"severity": "warning", + "pos": {"line": 1, "column": 0}, + "endPos": {"line": 1, "column": 7}, + "data": "declaration uses 'sorry'"}], + "env": 0} + +{"sorries": + [{"proofState": 2, + "pos": {"line": 1, "column": 20}, + "goal": "⊢ 1 = 1", + "endPos": {"line": 1, "column": 25}}], + "messages": + [{"severity": "warning", + "pos": {"line": 1, "column": 0}, + "endPos": {"line": 1, "column": 7}, + "data": "declaration uses 'sorry'"}], + "env": 1} + +{"proofStatus": "Completed", "proofState": 3, "goals": []} + +{"proofStatus": "Completed", "proofState": 4, "goals": []} + +{"proofStatus": "Completed", "proofState": 5, "goals": []} + diff --git a/test/record_tactic.in b/test/record_tactic.in new file mode 100644 index 00000000..6305934a --- /dev/null +++ b/test/record_tactic.in @@ -0,0 +1,11 @@ +{"cmd": "example : 1=1 := by sorry", "record": false} + +{"cmd": "example : 1=1 := by sorry", "record": true} + +{"cmd": "example : 1=1 := by sorry"} + +{"tactic": "rfl", "proofState": 0} + +{"tactic": "rfl", "proofState": 1} + +{"tactic": "rfl", "proofState": 2}