Skip to content
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ Commands may be of the form
{ "cmd" : "example : f = 2 := rfl", "env" : 1 }
```

```json
{ "cmd" : "example : f = 2 := rfl", "env" : 1, "record": false }
```

The input includes:
* A required `cmd` field containing the Lean command to execute
* An optional `env` field specifying a previous environment ID to build upon
* An optional `record` field (default: true) to record the environment after execution
* Optional fields for additional information:
* `allTactics`: when true, includes all tactic steps in the response
* `rootGoals`: when true, includes root goals as sorries
* `infotree`: specifies the level of info tree detail ("full", "tactics", "original", or "substantive")

The `env` field, if present,
must contain a number received in the `env` field of a previous response,
and causes the command to be run in the existing environment.
Expand All @@ -33,6 +46,7 @@ You can backtrack simply by using earlier values for `env`.
The response includes:
* A numeric label for the `Environment` after your command,
which you can use as the starting point for subsequent commands.
This will be `none` if `record` was set to false.
* Any messages generated while processing your command.
* A list of the `sorry`s in your command, including
* their expected type, and
Expand Down
16 changes: 12 additions & 4 deletions REPL/JSON.lean
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@ structure CommandOptions where
/-- Run Lean commands.
If `env = none`, starts a new session (in which you can use `import`).
If `env = some n`, builds on the existing environment `n`.
Setting `record = false` will discard the environment after execution, useful for memory management.
When `record = false`, the response's `env` field will be `none`.
-/
structure Command extends CommandOptions where
env : Option Nat
cmd : String
record : Option Bool := true
deriving ToJson, FromJson

/-- Process a Lean file in a fresh environment if `env` is not provided. -/
Expand All @@ -41,6 +44,7 @@ Run a tactic in a proof state.
structure ProofStep where
proofState : Nat
tactic : String
record: Option Bool := true
deriving ToJson, FromJson

/-- Line and column information for error messages and sorries. -/
Expand Down Expand Up @@ -122,7 +126,7 @@ A response to a Lean command.
`env` can be used in later calls, to build on the stored environment.
-/
structure CommandResponse where
env : Nat
env : Option Nat := none
messages : List Message := []
sorries : List Sorry := []
tactics : List Tactic := []
Expand All @@ -133,9 +137,13 @@ def Json.nonemptyList [ToJson α] (k : String) : List α → List (String × Jso
| [] => []
| l => [⟨k, toJson l⟩]

def Json.optField [ToJson α] (k : String) : Option α → List (String × Json)
| none => []
| some v => [(k, toJson v)]

instance : ToJson CommandResponse where
toJson r := Json.mkObj <| .flatten [
[("env", r.env)],
Json.optField "env" r.env,
Json.nonemptyList "messages" r.messages,
Json.nonemptyList "sorries" r.sorries,
Json.nonemptyList "tactics" r.tactics,
Expand All @@ -147,7 +155,7 @@ A response to a Lean tactic.
`proofState` can be used in later calls, to run further tactics.
-/
structure ProofStepResponse where
proofState : Nat
proofState : Option Nat
goals : List String
messages : List Message := []
sorries : List Sorry := []
Expand All @@ -157,7 +165,7 @@ deriving ToJson, FromJson

instance : ToJson ProofStepResponse where
toJson r := Json.mkObj <| .flatten [
[("proofState", r.proofState)],
Json.optField "proofState" r.proofState,
[("goals", toJson r.goals)],
Json.nonemptyList "messages" r.messages,
Json.nonemptyList "sorries" r.sorries,
Expand Down
56 changes: 34 additions & 22 deletions REPL/Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,8 @@ def getProofStatus (proofState : ProofSnapshot) : M m String := do
| _ => return "Incomplete: open goals remain"

/-- Record a `ProofSnapshot` and generate a JSON response for it. -/
def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none) :
M m ProofStepResponse := do
def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none)
(record : Option Bool := true) : M m ProofStepResponse := do
let messages := proofState.newMessages old?
let messages ← messages.mapM fun m => Message.of m
let traces ← proofState.newTraces old?
Expand All @@ -246,7 +246,9 @@ def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnap
-- For debugging purposes, sometimes we print out the trees here:
-- trees.forM fun t => do IO.println (← t.format)
let sorries ← sorries trees none (some proofState.rootGoals)
let id ← recordProofSnapshot proofState
let id ← match record with
| some false => pure none
| _ => some <$> recordProofSnapshot proofState
return {
proofState := id
goals := (← proofState.ppGoals).map fun s => s!"{s}"
Expand Down Expand Up @@ -323,7 +325,12 @@ def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do
fileMap := default,
snap? := none,
cancelTk? := none } }
let env ← recordCommandSnapshot cmdSnapshot
let env ← match s.record with
| some false => do pure none
| _ =>
let id ← recordCommandSnapshot cmdSnapshot
pure (some id)

let jsonTrees := match s.infotree with
| some "full" => trees
| some "tactics" => trees.flatMap InfoTree.retainTacticInfo
Expand Down Expand Up @@ -358,7 +365,7 @@ def runProofStep (s : ProofStep) : M IO (ProofStepResponse ⊕ Error) := do
| some proofState =>
try
let proofState' ← proofState.runString s.tactic
return .inl (← createProofStepReponse proofState' proofState)
return .inl (← createProofStepReponse proofState' proofState s.record)
catch ex =>
return .inr ⟨"Lean error:\n" ++ ex.toString⟩

Expand Down Expand Up @@ -419,23 +426,28 @@ def printFlush [ToString α] (s : α) : IO Unit := do
out.flush -- Flush the output

/-- Read-eval-print loop for Lean. -/
unsafe def repl : IO Unit :=
StateT.run' loop {}
where loop : M IO Unit := do
let query ← getLines
if query = "" then
return ()
if query.startsWith "#" || query.startsWith "--" then loop else
IO.println <| toString <| ← match ← parse query with
| .command r => return toJson (← runCommand r)
| .file r => return toJson (← processFile r)
| .proofStep r => return toJson (← runProofStep r)
| .pickleEnvironment r => return toJson (← pickleCommandSnapshot r)
| .unpickleEnvironment r => return toJson (← unpickleCommandSnapshot r)
| .pickleProofSnapshot r => return toJson (← pickleProofSnapshot r)
| .unpickleProofSnapshot r => return toJson (← unpickleProofSnapshot r)
printFlush "\n" -- easier to parse the output if there are blank lines
loop
unsafe def repl : IO Unit := do
let rec loop (s : State) : IO Unit := do
let query ← getLines
if query = "" then
return ()
else if query.startsWith "#" || query.startsWith "--" then
loop s
else
let (result, s') ← StateT.run (do
match ← parse query with
| .command r => pure <| toJson (← runCommand r)
| .file r => pure <| toJson (← processFile r)
| .proofStep r => pure <| toJson (← runProofStep r)
| .pickleEnvironment r => pure <| toJson (← pickleCommandSnapshot r)
| .unpickleEnvironment r => pure <| toJson (← unpickleCommandSnapshot r)
| .pickleProofSnapshot r => pure <| toJson (← pickleProofSnapshot r)
| .unpickleProofSnapshot r => pure <| toJson (← unpickleProofSnapshot r)
) s
IO.println (toString result)
printFlush "\n" -- easier to parse the output if there are blank lines
loop s'
loop {}

/-- Main executable function, run as `lake exe repl`. -/
unsafe def main (_ : List String) : IO Unit := do
Expand Down
25 changes: 23 additions & 2 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ EXPECTED_DIR="test"

lake build

# Initialize variables to track failures
failed_tests=()
failure_count=0

# ignore locale to ensure test `bla` runs before `bla2`
export LC_COLLATE=C

Expand Down Expand Up @@ -36,11 +40,28 @@ for infile in $IN_DIR/*.in; do
echo "$base: FAILED"
# Rename the temporary file instead of removing it
mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}"
exit 1
failed_tests+=("$base")
((failure_count++))
fi

done

# Print summary of failures
if [ ${#failed_tests[@]} -ne 0 ]; then
echo -e "\n=== Test Summary ==="
echo "Failed tests:"
for test in "${failed_tests[@]}"; do
echo "✗ $test"
done
echo -e "\nTotal: $failure_count failed"
echo "=================="
fi

# Run the Mathlib tests
cp lean-toolchain test/Mathlib/
cd test/Mathlib/ && ./test.sh
mathlib_exit_code=$?

# Exit with error if any test failed or if Mathlib tests failed
if [ $failure_count -gt 0 ] || [ $mathlib_exit_code -ne 0 ]; then
exit 1
fi
21 changes: 19 additions & 2 deletions test/Mathlib/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
IN_DIR="test"
EXPECTED_DIR="test"

# Initialize variables to track failures
failed_tests=()
failure_count=0

lake exe cache get > /dev/null
lake build Mathlib

Expand Down Expand Up @@ -34,8 +38,21 @@ for infile in $IN_DIR/*.in; do
echo "$base: FAILED"
# Rename the temporary file instead of removing it
mv "$tmpfile" "${expectedfile/.expected.out/.produced.out}"
exit 1
failed_tests+=("$base")
((failure_count++))
fi

done

# Print summary of failures
if [ ${#failed_tests[@]} -ne 0 ]; then
echo -e "\n=== Mathlib Test Summary ==="
echo "Failed tests:"
for test in "${failed_tests[@]}"; do
echo "✗ $test"
done
echo -e "\nTotal: $failure_count failed"
echo "========================"
exit 1
fi

exit 0
29 changes: 29 additions & 0 deletions test/Mathlib/test/record_cmd.expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{"env": 0}

{"sorries":
[{"proofState": 0,
"pos": {"line": 1, "column": 93},
"goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0",
"endPos": {"line": 1, "column": 98}}],
"messages":
[{"severity": "warning",
"pos": {"line": 1, "column": 8},
"endPos": {"line": 1, "column": 30},
"data": "declaration uses 'sorry'"}]}
Comment on lines +3 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand here: if we're generating proofState: 0 here, and it is actually usable, then the environment has been transitively captured, and the record: false wasn't really respected.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rex said that you can garbage collect the command that created the proof snapshot without garbage collecting the proof snapshot itself. I'm not sure what the use case for this would be tho. Also I think having each proof snapshot connected to a command would make it easier to garbage collect all the snapshots after you are done with tactic mode for a particular problem.

Copy link
Contributor Author

@RexWzh RexWzh May 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can check the green(garbage collected) and yellow lines in the diagram above. Here are the relevant commands:

# By default, this consumes about 83 MB
[{'cmd': 'import Mathlib'},
 {'cmd': 'theorem womp0 (a0 b c : Nat) : (a0 + b) + c = c + a0 + b := by sorry', 'env': 0},
 {'cmd': 'theorem womp1 (a1 b c : Nat) : (a1 + b) + c = c + a1 + b := by sorry', 'env': 0},
 # ...
]

# With record=False, this consumes about 21 MB
[{'cmd': 'import Mathlib'},
 {'cmd': 'theorem womp0 (a0 b c : Nat) : (a0 + b) + c = c + a0 + b := by sorry', 'env': 0, 'record': False},
 {'cmd': 'theorem womp1 (a1 b c : Nat) : (a1 + b) + c = c + a1 + b := by sorry', 'env': 0, 'record': False},
 # ...
]

The ProofSnapshot is independent of the cmdSnapshot once created. The structure provides sufficient information for interactions and pickles.

structure ProofSnapshot where
  coreState     : Core.State
  coreContext   : Core.Context
  metaState     : Meta.State
  metaContext   : Meta.Context
  termState     : Term.State
  termContext   : Term.Context
  tacticState   : Tactic.State
  tacticContext : Tactic.Context
  rootGoals     : List MVarId

Copy link
Contributor Author

@RexWzh RexWzh May 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and the record: false wasn't really respected

We can view it this way: in Cmd mode, record is intended for cmdSnapshot-gc, while in Tactic mode, it's for proofSnapshot.
And it can be used as default config for tactic-based interactions, as the generated cmdSnapshots are rarely used since they contain sorry.

If someone wishes to obtain the state while discarding both the cmdSnapshot and proofSnapshot, they can use the File mode like:

{"cmd": "example : 1 = 1 := by", "env": 0, "record": false}

And then extract the states from errors.

btw, this feature is beneficial for File-mode-based interactions like itp-interfaces, where most generated cmdStates are wasted due to incomplete errors.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh so command snapshots are only necessary to create additional command states. Which rarely happens for a single proof. Like usually u would only want to snapshot import headers and stuff


{"proofStatus": "Completed", "proofState": 1, "goals": []}

{"sorries":
[{"proofState": 2,
"pos": {"line": 1, "column": 93},
"goal": "⊢ (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0",
"endPos": {"line": 1, "column": 98}}],
"messages":
[{"severity": "warning",
"pos": {"line": 1, "column": 8},
"endPos": {"line": 1, "column": 30},
"data": "declaration uses 'sorry'"}],
"env": 1}

{"proofStatus": "Completed", "proofState": 3, "goals": []}

9 changes: 9 additions & 0 deletions test/Mathlib/test/record_cmd.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{"cmd": "import Mathlib"}

{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0, "record": false}

{"tactic": "norm_num", "proofState": 0}

{"cmd": "theorem mathd_numbertheory_345 : (2000 + 2001 + 2002 + 2003 + 2004 + 2005 + 2006) % 7 = 0 := sorry", "env":0}

{"tactic": "norm_num", "proofState": 2}
17 changes: 17 additions & 0 deletions test/record_cmd.expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{"env": 0}

{"messages":
[{"severity": "info",
"pos": {"line": 1, "column": 0},
"endPos": {"line": 1, "column": 5},
"data": "true"}]}

{"message": "Unknown environment."}

{"messages":
[{"severity": "info",
"pos": {"line": 1, "column": 0},
"endPos": {"line": 1, "column": 5},
"data": "37"}],
"env": 1}

7 changes: 7 additions & 0 deletions test/record_cmd.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{"cmd": "def f := 37"}

{"cmd": "#eval true", "record": false}

{"cmd": "#eval false", "env":1}

{"cmd": "#eval f", "env":0}
35 changes: 35 additions & 0 deletions test/record_cmd2.expected.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{"messages":
[{"severity": "error",
"pos": {"line": 1, "column": 20},
"endPos": null,
"data": "unexpected end of input; expected '{'"},
{"severity": "error",
"pos": {"line": 1, "column": 18},
"endPos": {"line": 1, "column": 20},
"data": "unsolved goals\n⊢ 2 = 2"}],
"env": 0}

{"env": 1}

{"messages":
[{"severity": "error",
"pos": {"line": 1, "column": 66},
"endPos": null,
"data": "unexpected end of input; expected '{'"},
{"severity": "error",
"pos": {"line": 1, "column": 64},
"endPos": {"line": 1, "column": 66},
"data": "unsolved goals\nP Q R : Prop\n⊢ (P → Q) → (Q → R) → P → R"}]}

{"messages":
[{"severity": "error",
"pos": {"line": 1, "column": 64},
"endPos": {"line": 2, "column": 13},
"data": "unsolved goals\nP Q R : Prop\nh1 : P → Q\nh2 : Q → R\np : P\n⊢ R"}]}

{"messages":
[{"severity": "info",
"pos": {"line": 3, "column": 0},
"endPos": {"line": 3, "column": 6},
"data": "Try this: exact h2 (h1 p)"}]}

9 changes: 9 additions & 0 deletions test/record_cmd2.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{"cmd": "example : 2=2 := by"}

{"cmd": "example : 2=2 := by\nrfl"}

{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by", "record": false}

{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p", "record": false}

{"cmd": "theorem gc_demo (P Q R : Prop) : (P → Q) → (Q → R) → (P → R) := by\nintro h1 h2 p\napply?", "record": false}
Loading