leanprover-community · RexWzh · Apr 10, 2025 · Apr 10, 2025 · Apr 11, 2025 · Apr 14, 2025
diff --git a/README.md b/README.md
@@ -20,6 +20,19 @@ Commands may be of the form
 { "cmd" : "example : f = 2 := rfl", "env" : 1 }
 ```
 
+```json
+{ "cmd" : "example : f = 2 := rfl", "env" : 1, "record": false }
+```
+
+The input includes:
+* A required `cmd` field containing the Lean command to execute
+* An optional `env` field specifying a previous environment ID to build upon
+* An optional `record` field (default: true) to record the environment after execution
+* Optional fields for additional information:
+  * `allTactics`: when true, includes all tactic steps in the response
+  * `rootGoals`: when true, includes root goals as sorries
+  * `infotree`: specifies the level of info tree detail ("full", "tactics", "original", or "substantive")
+
 The `env` field, if present,
 must contain a number received in the `env` field of a previous response,
 and causes the command to be run in the existing environment.
@@ -33,6 +46,7 @@ You can backtrack simply by using earlier values for `env`.
 The response includes:
 * A numeric label for the `Environment` after your command,
   which you can use as the starting point for subsequent commands.
+  This will be `none` if `record` was set to false.
 * Any messages generated while processing your command.
 * A list of the `sorry`s in your command, including
   * their expected type, and

diff --git a/REPL/JSON.lean b/REPL/JSON.lean
@@ -23,10 +23,13 @@ structure CommandOptions where
 /-- Run Lean commands.
 If `env = none`, starts a new session (in which you can use `import`).
 If `env = some n`, builds on the existing environment `n`.
+Setting `record = false` will discard the environment after execution, useful for memory management.
+When `record = false`, the response's `env` field will be `none`.
 -/
 structure Command extends CommandOptions where
   env : Option Nat
   cmd : String
+  record : Option Bool := true
 deriving ToJson, FromJson
 
 /-- Process a Lean file in a fresh environment if `env` is not provided. -/
@@ -41,6 +44,7 @@ Run a tactic in a proof state.
 structure ProofStep where
   proofState : Nat
   tactic : String
+  record: Option Bool := true
 deriving ToJson, FromJson
 
 /-- Line and column information for error messages and sorries. -/
@@ -122,7 +126,7 @@ A response to a Lean command.
 `env` can be used in later calls, to build on the stored environment.
 -/
 structure CommandResponse where
-  env : Nat
+  env : Option Nat := none
   messages : List Message := []
   sorries : List Sorry := []
   tactics : List Tactic := []
@@ -133,9 +137,13 @@ def Json.nonemptyList [ToJson α] (k : String) : List α → List (String × Jso
   | [] => []
   | l  => [⟨k, toJson l⟩]
 
+def Json.optField [ToJson α] (k : String) : Option α → List (String × Json)
+  | none => []
+  | some v => [(k, toJson v)]
+
 instance : ToJson CommandResponse where
   toJson r := Json.mkObj <| .flatten [
-    [("env", r.env)],
+    Json.optField "env" r.env,
     Json.nonemptyList "messages" r.messages,
     Json.nonemptyList "sorries" r.sorries,
     Json.nonemptyList "tactics" r.tactics,
@@ -147,7 +155,7 @@ A response to a Lean tactic.
 `proofState` can be used in later calls, to run further tactics.
 -/
 structure ProofStepResponse where
-  proofState : Nat
+  proofState : Option Nat
   goals : List String
   messages : List Message := []
   sorries : List Sorry := []
@@ -157,7 +165,7 @@ deriving ToJson, FromJson
 
 instance : ToJson ProofStepResponse where
   toJson r := Json.mkObj <| .flatten [
-    [("proofState", r.proofState)],
+    Json.optField "proofState" r.proofState,
     [("goals", toJson r.goals)],
     Json.nonemptyList "messages" r.messages,
     Json.nonemptyList "sorries" r.sorries,

diff --git a/REPL/Main.lean b/REPL/Main.lean
@@ -231,8 +231,8 @@ def getProofStatus (proofState : ProofSnapshot) : M m String := do
     | _ => return "Incomplete: open goals remain"
 
 /-- Record a `ProofSnapshot` and generate a JSON response for it. -/
-def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none) :
-    M m ProofStepResponse := do
+def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none)
+  (record : Option Bool := true) : M m ProofStepResponse := do
   let messages := proofState.newMessages old?
   let messages ← messages.mapM fun m => Message.of m
   let traces ← proofState.newTraces old?
@@ -246,7 +246,9 @@ def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnap
   -- For debugging purposes, sometimes we print out the trees here:
   -- trees.forM fun t => do IO.println (← t.format)
   let sorries ← sorries trees none (some proofState.rootGoals)
-  let id ← recordProofSnapshot proofState
+  let id ← match record with
+  | some false => pure none
+  | _ => some <$> recordProofSnapshot proofState
   return {
     proofState := id
     goals := (← proofState.ppGoals).map fun s => s!"{s}"
@@ -323,7 +325,12 @@ def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do
         fileMap := default,
         snap? := none,
         cancelTk? := none } }
-  let env ← recordCommandSnapshot cmdSnapshot
+  let env ← match s.record with
+  | some false => do pure none
+  | _ =>
+    let id ← recordCommandSnapshot cmdSnapshot
+    pure (some id)
+
   let jsonTrees := match s.infotree with
   | some "full" => trees
   | some "tactics" => trees.flatMap InfoTree.retainTacticInfo
@@ -358,7 +365,7 @@ def runProofStep (s : ProofStep) : M IO (ProofStepResponse ⊕ Error) := do
   | some proofState =>
     try
       let proofState' ← proofState.runString s.tactic
-      return .inl (← createProofStepReponse proofState' proofState)
+      return .inl (← createProofStepReponse proofState' proofState s.record)
     catch ex =>
       return .inr ⟨"Lean error:\n" ++ ex.toString⟩
 
@@ -419,23 +426,28 @@ def printFlush [ToString α] (s : α) : IO Unit := do
   out.flush -- Flush the output
 
 /-- Read-eval-print loop for Lean. -/
-unsafe def repl : IO Unit :=
-  StateT.run' loop {}
-where loop : M IO Unit := do
-  let query ← getLines
-  if query = "" then
-    return ()
-  if query.startsWith "#" || query.startsWith "--" then loop else
-  IO.println <| toString <| ← match ← parse query with
-  | .command r => return toJson (← runCommand r)
-  | .file r => return toJson (← processFile r)
-  | .proofStep r => return toJson (← runProofStep r)
-  | .pickleEnvironment r => return toJson (← pickleCommandSnapshot r)
-  | .unpickleEnvironment r => return toJson (← unpickleCommandSnapshot r)
-  | .pickleProofSnapshot r => return toJson (← pickleProofSnapshot r)
-  | .unpickleProofSnapshot r => return toJson (← unpickleProofSnapshot r)
-  printFlush "\n" -- easier to parse the output if there are blank lines
-  loop
+unsafe def repl : IO Unit := do
+  let rec loop (s : State) : IO Unit := do
+    let query ← getLines
+    if query = "" then
+      return ()
+    else if query.startsWith "#" || query.startsWith "--" then
+      loop s
+    else
+      let (result, s') ← StateT.run (do
+        match ← parse query with
+        | .command r => pure <| toJson (← runCommand r)
+        | .file r => pure <| toJson (← processFile r)
+        | .proofStep r => pure <| toJson (← runProofStep r)
+        | .pickleEnvironment r => pure <| toJson (← pickleCommandSnapshot r)
+        | .unpickleEnvironment r => pure <| toJson (← unpickleCommandSnapshot r)
+        | .pickleProofSnapshot r => pure <| toJson (← pickleProofSnapshot r)
+        | .unpickleProofSnapshot r => pure <| toJson (← unpickleProofSnapshot r)
+      ) s
+      IO.println (toString result)
+      printFlush "\n" -- easier to parse the output if there are blank lines
+      loop s'
+  loop {}
 
 /-- Main executable function, run as `lake exe repl`. -/
 unsafe def main (_ : List String) : IO Unit := do

diff --git a/test.sh b/test.sh
@@ -6,6 +6,10 @@ EXPECTED_DIR="test"
 
 lake build
 
+# Initialize variables to track failures
+failed_tests=()
+failure_count=0
+
 # ignore locale to ensure test `bla` runs before `bla2`
 export LC_COLLATE=C
 
@@ -36,11 +40,28 @@ for infile in $IN_DIR/*.in; do
         echo "$base: FAILED"
         # Rename the temporary file instead of removing it
         mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}"
-        exit 1
+        failed_tests+=("$base")
+        ((failure_count++))
     fi
-
 done
 
+# Print summary of failures
+if [ ${#failed_tests[@]} -ne 0 ]; then
+    echo -e "\n=== Test Summary ==="
+    echo "Failed tests:"
+    for test in "${failed_tests[@]}"; do
+        echo "✗ $test"
+    done
+    echo -e "\nTotal: $failure_count failed"
+    echo "=================="
+fi
+
 # Run the Mathlib tests
 cp lean-toolchain test/Mathlib/
 cd test/Mathlib/ && ./test.sh
+mathlib_exit_code=$?
+
+# Exit with error if any test failed or if Mathlib tests failed
+if [ $failure_count -gt 0 ] || [ $mathlib_exit_code -ne 0 ]; then
+    exit 1
+fi
diff --git a/test/Mathlib/test.sh b/test/Mathlib/test.sh
@@ -4,6 +4,10 @@
 IN_DIR="test"
 EXPECTED_DIR="test"
 
+# Initialize variables to track failures
+failed_tests=()
+failure_count=0
+
 lake exe cache get > /dev/null
 lake build Mathlib
 
@@ -34,8 +38,21 @@ for infile in $IN_DIR/*.in; do
         echo "$base: FAILED"
         # Rename the temporary file instead of removing it
         mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}"
-        exit 1
+        failed_tests+=("$base")
+        ((failure_count++))
     fi
-
 done
 
+# Print summary of failures
+if [ ${#failed_tests[@]} -ne 0 ]; then
+    echo -e "\n=== Mathlib Test Summary ==="
+    echo "Failed tests:"
+    for test in "${failed_tests[@]}"; do
+        echo "✗ $test"
+    done
+    echo -e "\nTotal: $failure_count failed"
+    echo "========================"
+    exit 1
+fi
+
+exit 0
diff --git a/test/Mathlib/test/record_cmd.expected.out b/test/Mathlib/test/record_cmd.expected.out
@@ -0,0 +1,29 @@
+{"env": 0}
+
+{"sorries":
+ [{"proofState": 0,
+   "pos": {"line": 1, "column": 93},
+   "goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0",
+   "endPos": {"line": 1, "column": 98}}],
+ "messages":
+ [{"severity": "warning",
+   "pos": {"line": 1, "column": 8},
+   "endPos": {"line": 1, "column": 30},
+   "data": "declaration uses 'sorry'"}]}
+
+{"proofStatus": "Completed", "proofState": 1, "goals": []}
+
+{"sorries":
+ [{"proofState": 2,
+   "pos": {"line": 1, "column": 93},
+   "goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0",
+   "endPos": {"line": 1, "column": 98}}],
+ "messages":
+ [{"severity": "warning",
+   "pos": {"line": 1, "column": 8},
+   "endPos": {"line": 1, "column": 30},
+   "data": "declaration uses 'sorry'"}],
+ "env": 1}
+
+{"proofStatus": "Completed", "proofState": 3, "goals": []}
+
diff --git a/test/Mathlib/test/record_cmd.in b/test/Mathlib/test/record_cmd.in
@@ -0,0 +1,9 @@
+{"cmd": "import Mathlib"}
+
+{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0, "record": false}
+
+{"tactic": "norm_num", "proofState": 0}
+
+{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0}
+
+{"tactic": "norm_num", "proofState": 2}
diff --git a/test/record_cmd.expected.out b/test/record_cmd.expected.out
@@ -0,0 +1,17 @@
+{"env": 0}
+
+{"messages":
+ [{"severity": "info",
+   "pos": {"line": 1, "column": 0},
+   "endPos": {"line": 1, "column": 5},
+   "data": "true"}]}
+
+{"message": "Unknown environment."}
+
+{"messages":
+ [{"severity": "info",
+   "pos": {"line": 1, "column": 0},
+   "endPos": {"line": 1, "column": 5},
+   "data": "37"}],
+ "env": 1}
+
diff --git a/test/record_cmd.in b/test/record_cmd.in
@@ -0,0 +1,7 @@
+{"cmd": "def f := 37"}
+
+{"cmd": "#eval true", "record": false}
+
+{"cmd": "#eval false", "env":1}
+
+{"cmd": "#eval f", "env":0}
diff --git a/test/record_cmd2.expected.out b/test/record_cmd2.expected.out
@@ -0,0 +1,35 @@
+{"messages":
+ [{"severity": "error",
+   "pos": {"line": 1, "column": 20},
+   "endPos": null,
+   "data": "unexpected end of input; expected '{'"},
+  {"severity": "error",
+   "pos": {"line": 1, "column": 18},
+   "endPos": {"line": 1, "column": 20},
+   "data": "unsolved goals\n⊢ 2 = 2"}],
+ "env": 0}
+
+{"env": 1}
+
+{"messages":
+ [{"severity": "error",
+   "pos": {"line": 1, "column": 66},
+   "endPos": null,
+   "data": "unexpected end of input; expected '{'"},
+  {"severity": "error",
+   "pos": {"line": 1, "column": 64},
+   "endPos": {"line": 1, "column": 66},
+   "data": "unsolved goals\nP Q R : Prop\n⊢ (P → Q) → (Q → R) → P → R"}]}
+
+{"messages":
+ [{"severity": "error",
+   "pos": {"line": 1, "column": 64},
+   "endPos": {"line": 2, "column": 13},
+   "data": "unsolved goals\nP Q R : Prop\nh1 : P → Q\nh2 : Q → R\np : P\n⊢ R"}]}
+
+{"messages":
+ [{"severity": "info",
+   "pos": {"line": 3, "column": 0},
+   "endPos": {"line": 3, "column": 6},
+   "data": "Try this: exact h2 (h1 p)"}]}
+
diff --git a/test/record_cmd2.in b/test/record_cmd2.in
@@ -0,0 +1,9 @@
+{"cmd": "example  : 2=2 := by"}
+
+{"cmd": "example  : 2=2 := by\nrfl"}
+
+{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by", "record": false}
+
+{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p", "record": false}
+
+{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p\napply?", "record": false}