rookiemango
/

lean4-compile

Model card Files Files and versions Community

rookiemango commited on Sep 22, 2024

Commit

dddc1ae

verified ·

1 Parent(s): 67945ec

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +35 -0
.github/workflows/ci.yml +24 -0
.gitignore +7 -0
.vscode/copyright.code-snippets +13 -0
.vscode/extensions.json +13 -0
.vscode/module-docstring.code-snippets +35 -0
.vscode/settings.json +11 -0
README.md +174 -0
REPL.lean +4 -0
REPL/Frontend.lean +47 -0
REPL/JSON.lean +186 -0
REPL/Lean/ContextInfo.lean +9 -0
REPL/Lean/Environment.lean +31 -0
REPL/Lean/InfoTree.lean +272 -0
REPL/Lean/InfoTree/ToJson.lean +114 -0
REPL/Main.lean +323 -0
REPL/Snapshots.lean +306 -0
REPL/Util/Path.lean +36 -0
REPL/Util/Pickle.lean +44 -0
__pycache__/code.cpython-310.pyc +0 -0
__pycache__/code.cpython-39.pyc +0 -0
__pycache__/openllm_pass_rate_new_test.cpython-39.pyc +0 -0
all_code.py +159 -0
basic_working.json +0 -0
code.py +69 -0
data/basic_working.json +0 -0
data/notlean_dependency.json +3 -0
gpt_pass_rate_multi_pass.py +54 -0
gpt_pass_rate_new_notlean_test.py +289 -0
gpt_pass_rate_new_test.py +287 -0
lake-manifest.json +68 -0
lakefile.lean +17 -0
lean-toolchain +1 -0
nohup.out +4 -0
openllm_pass_rate_multi_pass.py +106 -0
openllm_pass_rate_new_notlean_test.py +265 -0
openllm_pass_rate_new_test.py +306 -0
pass_rate.py +194 -0
pass_rate_atp_pass.py +112 -0
pass_rate_atp_test.py +264 -0
pass_rate_found_item.py +175 -0
pass_rate_multi.py +48 -0
pass_rate_multi_notlean.py +40 -0
pass_rate_multi_notlean_pass.py +43 -0
pass_rate_multi_pass.py +112 -0
pass_rate_new.py +196 -0
pass_rate_new_test.py +255 -0
pass_rate_new_test_allcontent.py +255 -0
pass_rate_notlean.py +202 -0
pass_rate_notlean_test.py +261 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,38 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+results.json filter=lfs diff=lfs merge=lfs -text
+test/15k_state_problem_translation.json filter=lfs diff=lfs merge=lfs -text
+test/lean4_random/1k_test.json filter=lfs diff=lfs merge=lfs -text
+test/lean4_random/5k_first.json filter=lfs diff=lfs merge=lfs -text
+test/lean4_random/5k_second.json filter=lfs diff=lfs merge=lfs -text
+test/lean4_random/5k_third.json filter=lfs diff=lfs merge=lfs -text
+test/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_basic_test/generation/lean4_random_5k_first_1epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_basic_test/generation/lean4_random_5k_first_2epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_basic_test/generation/lean4_random_5k_first_3epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_random_test/generation/lean4_random_5k_first_1epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_random_test/generation/lean4_random_5k_first_2epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/lean4_random_test/generation/lean4_random_5k_first_3epoch/1/result.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/math_train/generation/lean4_random_15k_all/2/1/0.json filter=lfs diff=lfs merge=lfs -text
+test/zero_shot/math_train/generation/lean4_random_15k_all/2/1/1.json filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt3/1.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt3/2.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt3/3.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt3/4.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt3/5.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt4/1.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_basic/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt3/1.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt3/2.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt3/3.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt3/4.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt3/5.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt4/1.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt4/2.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt4/3.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt4/4.jsonl filter=lfs diff=lfs merge=lfs -text
+gpt_result/lean_random/gpt4/5.jsonl filter=lfs diff=lfs merge=lfs -text

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: Run Tests
+on: [push, pull_request]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v2
+    - name: install elan
+      run: |
+        set -o pipefail
+        curl -sSfL https://github.com/leanprover/elan/releases/download/v3.0.0/elan-x86_64-unknown-linux-gnu.tar.gz | tar xz
+        ./elan-init -y --default-toolchain none
+        echo "$HOME/.elan/bin" >> $GITHUB_PATH
+    - name: build
+      run: lake build
+    - name: Run tests
+      run: ./test.sh

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+/build
+/lake-packages/*
+/lakefile.olean
+/.lake
+/test/Mathlib/.lake
+/test/*.olean
+/test/*.olean.tmp

.vscode/copyright.code-snippets ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"Copyright header for mathlib": {
+		"scope": "lean4",
+		"prefix": "copyright",
+		"body": [
+			"/-",
+			"Copyright (c) ${CURRENT_YEAR} $1. All rights reserved.",
+			"Released under Apache 2.0 license as described in the file LICENSE.",
+			"Authors: $1",
+			"-/"
+		]
+	}
+}

.vscode/extensions.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	// See https://go.microsoft.com/fwlink/?LinkId=827846 to learn about workspace recommendations.
+	// Extension identifier format: ${publisher}.${name}. Example: vscode.csharp
+	// List of extensions which should be recommended for users of this workspace.
+	"recommendations": [
+		"leanprover.lean4"
+	],
+	// List of extensions recommended by VS Code that should not be recommended for users of this workspace.
+	"unwantedRecommendations": [
+		"ms-vscode-remote.remote-containers"
+	]
+}

.vscode/module-docstring.code-snippets ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+	"Module docstring for mathlib": {
+		"scope": "lean4",
+		"prefix": "module docstring",
+		"body": [
+			"/-!",
+      "# ${TM_FILENAME_BASE/([^_]*)(_?)/${1:/capitalize}${2:+ }/g}",
+			"",
+			"## Main definitions",
+			"",
+			"* `FooBar`",
+			"",
+			"## Main statements",
+			"",
+			"* `fooBar_unique`",
+			"",
+			"## Notation",
+			"",
+			"",
+			"",
+			"## Implementation details",
+			"",
+			"",
+			"",
+			"## References",
+			"",
+			"* [F. Bar, *Quuxes*][bibkey]",
+			"",
+			"## Tags",
+			"",
+			"Foobars, barfoos",
+			"-/",
+			"",
+		]},
+}

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "editor.insertSpaces": true,
+  "editor.tabSize": 2,
+  "editor.rulers" : [100],
+  "files.encoding": "utf8",
+  "files.eol": "\n",
+  "files.insertFinalNewline": true,
+  // We don't use this: it messes up our test files!
+  // "files.trimFinalNewlines": true,
+  "files.trimTrailingWhitespace": true,
+}

README.md ADDED Viewed

	@@ -0,0 +1,174 @@

+# A read-eval-print-loop for Lean 4
+Run using `lake exe repl`.
+Communicates via JSON on stdin and stdout.
+Commands should be separated by blank lines.
+The REPL works both in "command" mode and "tactic" mode.
+## Command mode
+In command mode, you send complete commands (e.g. declarations) to the REPL.
+Commands may be of the form
+```json
+{ "cmd" : "def f := 2" }
+```
+```json
+{ "cmd" : "example : f = 2 := rfl", "env" : 1 }
+```
+The `env` field, if present,
+must contain a number received in the `env` field of a previous response,
+and causes the command to be run in the existing environment.
+If there is no `env` field, a new environment is created.
+You can only use `import` commands when you do not specify the `env` field.
+You can backtrack simply by using earlier values for `env`.
+The response includes:
+* A numeric label for the `Environment` after your command,
+  which you can use as the starting point for subsequent commands.
+* Any messages generated while processing your command.
+* A list of the `sorry`s in your command, including
+  * their expected type, and
+  * a numeric label for the proof state at the `sorry`, which you can then use in tactic mode.
+Example output:
+```json
+{"sorries":
+ [{"pos": {"line": 1, "column": 18},
+   "endPos": {"line": 1, "column": 23},
+   "goal": "⊢ Nat",
+   "proofState": 0}],
+ "messages":
+ [{"severity": "error",
+   "pos": {"line": 1, "column": 23},
+   "endPos": {"line": 1, "column": 26},
+   "data":
+   "type mismatch\n  rfl\nhas type\n  f = f : Prop\nbut is expected to have type\n  f = 2 : Prop"}],
+ "env": 6}
+```
+showing any messages generated, and sorries with their goal states.
+## File mode
+There is a simple wrapper around command mode that allows reading in an entire file.
+If `test/file.lean` contains
+```lean
+def f : Nat := 37
+def g := 2
+theorem h : f + g = 39 := by exact rfl
+```
+then
+```
+echo '{"path": "test/file.lean", "allTactics": true}' | lake exe repl
+```
+results in output
+```json
+{"tactics":
+ [{"tactic": "exact rfl",
+   "proofState": 0,
+   "pos": {"line": 5, "column": 29},
+   "goals": "⊢ f + g = 39",
+   "endPos": {"line": 5, "column": 38}}],
+ "env": 0}
+ ```
+## Tactic mode (experimental)
+To enter tactic mode issue a command containing a `sorry`,
+and then use the `proofState` index returned for each `sorry`.
+Example usage:
+```json
+{"cmd" : "def f (x : Unit) : Nat := by sorry"}
+{"sorries":
+ [{"proofState": 0,
+   "pos": {"line": 1, "column": 29},
+   "goal": "x : Unit\n⊢ Nat",
+   "endPos": {"line": 1, "column": 34}}],
+ "messages":
+ [{"severity": "warning",
+   "pos": {"line": 1, "column": 4},
+   "endPos": {"line": 1, "column": 5},
+   "data": "declaration uses 'sorry'"}],
+ "env": 0}
+{"tactic": "apply Int.natAbs", "proofState": 0}
+{"proofState": 1, "goals": ["x : Unit\n⊢ Int"]}
+{"tactic": "exact -37", "proofState": 1}
+{"proofState": 2, "goals": []}
+```
+You can use `sorry` in tactic mode.
+The result will contain additional `proofState` identifiers for the goal at each sorry.
+At present there is nothing you can do with a completed proof state:
+we would like to extend this so that you can replace the original `sorry` with your tactic script,
+and obtain the resulting `Environment`
+## Pickling
+The REPL supports pickling environments and proof states to disk as `.olean` files.
+As long as the same imports are available, it should be possible to move such an `.olean` file
+to another machine and unpickle into a new REPL session.
+The commands are
+```json
+{"pickleTo": "path/to/file.olean", "env": 7}
+{"pickleTo": "path/to/file.olean", "proofState": 17}
+{"unpickleEnvFrom": "path/to/file.olean"}
+{"unpickleProofStateFrom": "path/to/file.olean"}
+```
+The unpickling commands will report the new "env" or "proofState" identifier that
+you can use in subsequent commands.
+Pickling is quite efficient:
+* we don't record full `Environment`s, only the changes relative to imports
+* unpickling uses memory mapping
+* file sizes are generally small, but see https://github.com/digama0/leangz if compression is
+  desirable
+## Using the REPL from another project
+Set up your project as usual using `lake new` or `lake init`
+(or the interactive setup GUI available via the VSCode extension under the `∀` menu).
+In that project, add `require` statements in the `lakefile.lean` for any dependencies you need
+(e.g. Mathlib). (You probably should verify that `lake build` works as expected in that project.)
+Now you can run the REPL as:
+```shell
+lake env ../path/to/repl/.lake/build/bin/repl < commands.in
+```
+(Here `../path/to/repl/` represents the path to your checkout of this repository,
+in which you've already run `lake build`.)
+The `lake env` prefix sets up the environment associated to your local project, so that the REPL
+can find needed imports.
+## Future work
+* Replay tactic scripts from tactic mode back into the original `sorry`.
+* Currently if you create scoped environment extensions (e.g. scoped notations) in a session
+  these are not correctly pickled and unpickled in later sessions.

REPL.lean ADDED Viewed

	@@ -0,0 +1,4 @@

+import REPL.Frontend
+import REPL.Lean.InfoTree
+import REPL.JSON
+import REPL.Main

REPL/Frontend.lean ADDED Viewed

	@@ -0,0 +1,47 @@

+/-
+Copyright (c) 2023 Scott Morrison. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Scott Morrison
+-/
+import Lean.Elab.Frontend
+open Lean Elab
+namespace Lean.Elab.IO
+/--
+Wrapper for `IO.processCommands` that enables info states, and returns
+* the new command state
+* messages
+* info trees
+-/
+def processCommandsWithInfoTrees
+    (inputCtx : Parser.InputContext) (parserState : Parser.ModuleParserState)
+    (commandState : Command.State) : IO (Command.State × List Message × List InfoTree) := do
+  let commandState := { commandState with infoState.enabled := true }
+  let s ← IO.processCommands inputCtx parserState commandState <&> Frontend.State.commandState
+  pure (s, s.messages.msgs.toList, s.infoState.trees.toList)
+/--
+Process some text input, with or without an existing command state.
+If there is no existing environment, we parse the input for headers (e.g. import statements),
+and create a new environment.
+Otherwise, we add to the existing environment.
+Returns the resulting command state, along with a list of messages and info trees.
+-/
+def processInput (input : String) (cmdState? : Option Command.State)
+    (opts : Options := {}) (fileName : Option String := none) :
+    IO (Command.State × List Message × List InfoTree) := unsafe do
+  Lean.initSearchPath (← Lean.findSysroot)
+  enableInitializersExecution
+  let fileName   := fileName.getD "<input>"
+  let inputCtx   := Parser.mkInputContext input fileName
+  let (parserState, commandState) ← match cmdState? with
+  | none => do
+    let (header, parserState, messages) ← Parser.parseHeader inputCtx
+    let (env, messages) ← processHeader header opts messages inputCtx
+    pure (parserState, (Command.mkState env messages opts))
+  | some cmdState => do
+    pure ({ : Parser.ModuleParserState }, cmdState)
+  processCommandsWithInfoTrees inputCtx parserState commandState

REPL/JSON.lean ADDED Viewed

	@@ -0,0 +1,186 @@

+/-
+Copyright (c) 2023 Scott Morrison. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Scott Morrison
+-/
+import Lean.Data.Json
+import Lean.Message
+import Lean.Elab.InfoTree.Main
+open Lean Elab InfoTree
+namespace REPL
+structure CommandOptions where
+  allTactics : Option Bool := none
+  /--
+  Should be "full", "tactics", "original", or "substantive".
+  Anything else is ignored.
+  -/
+  infotree : Option String
+/-- Run Lean commands.
+If `env = none`, starts a new session (in which you can use `import`).
+If `env = some n`, builds on the existing environment `n`.
+-/
+structure Command extends CommandOptions where
+  env : Option Nat
+  cmd : String
+deriving ToJson, FromJson
+/-- Process a Lean file in a fresh environment. -/
+structure File extends CommandOptions where
+  path : System.FilePath
+deriving FromJson
+/--
+Run a tactic in a proof state.
+-/
+structure ProofStep where
+  proofState : Nat
+  tactic : String
+deriving ToJson, FromJson
+/-- Line and column information for error messages and sorries. -/
+structure Pos where
+  line : Nat
+  column : Nat
+deriving ToJson, FromJson
+/-- Severity of a message. -/
+inductive Severity
+  | trace | info | warning | error
+deriving ToJson, FromJson
+/-- A Lean message. -/
+structure Message where
+  pos : Pos
+  endPos : Option Pos
+  severity : Severity
+  data : String
+deriving ToJson, FromJson
+/-- Construct the JSON representation of a Lean message. -/
+def Message.of (m : Lean.Message) : IO Message := do pure <|
+  { pos := ⟨m.pos.line, m.pos.column⟩,
+    endPos := m.endPos.map fun p => ⟨p.line, p.column⟩,
+    severity := match m.severity with
+    | .information => .info
+    | .warning => .warning
+    | .error => .error,
+    data := (← m.data.toString).trim }
+/-- A Lean `sorry`. -/
+structure Sorry where
+  pos : Pos
+  endPos : Pos
+  goal : String
+  /--
+  The index of the proof state at the sorry.
+  You can use the `ProofStep` instruction to run a tactic at this state.
+  -/
+  proofState : Option Nat
+deriving FromJson
+instance : ToJson Sorry where
+  toJson r := Json.mkObj <| .join [
+    [("goal", r.goal)],
+    [("proofState", toJson r.proofState)],
+    if r.pos.line ≠ 0 then [("pos", toJson r.pos)] else [],
+    if r.endPos.line ≠ 0 then [("endPos", toJson r.endPos)] else [],
+  ]
+/-- Construct the JSON representation of a Lean sorry. -/
+def Sorry.of (goal : String) (pos endPos : Lean.Position) (proofState : Option Nat) : Sorry :=
+  { pos := ⟨pos.line, pos.column⟩,
+    endPos := ⟨endPos.line, endPos.column⟩,
+    goal,
+    proofState }
+structure Tactic where
+  pos : Pos
+  endPos : Pos
+  goals : String
+  tactic : String
+  proofState : Option Nat
+deriving ToJson, FromJson
+/-- Construct the JSON representation of a Lean tactic. -/
+def Tactic.of (goals tactic : String) (pos endPos : Lean.Position) (proofState : Option Nat) : Tactic :=
+  { pos := ⟨pos.line, pos.column⟩,
+    endPos := ⟨endPos.line, endPos.column⟩,
+    goals,
+    tactic,
+    proofState }
+/--
+A response to a Lean command.
+`env` can be used in later calls, to build on the stored environment.
+-/
+structure CommandResponse where
+  env : Nat
+  messages : List Message := []
+  sorries : List Sorry := []
+  tactics : List Tactic := []
+  infotree : Option Json := none
+deriving FromJson
+def Json.nonemptyList [ToJson α] (k : String) : List α → List (String × Json)
+  | [] => []
+  | l  => [⟨k, toJson l⟩]
+instance : ToJson CommandResponse where
+  toJson r := Json.mkObj <| .join [
+    [("env", r.env)],
+    Json.nonemptyList "messages" r.messages,
+    Json.nonemptyList "sorries" r.sorries,
+    Json.nonemptyList "tactics" r.tactics,
+    match r.infotree with | some j => [("infotree", j)] | none => []
+  ]
+/--
+A response to a Lean tactic.
+`proofState` can be used in later calls, to run further tactics.
+-/
+structure ProofStepResponse where
+  proofState : Nat
+  goals : List String
+  messages : List Message := []
+  sorries : List Sorry := []
+  traces : List String
+deriving ToJson, FromJson
+instance : ToJson ProofStepResponse where
+  toJson r := Json.mkObj <| .join [
+    [("proofState", r.proofState)],
+    [("goals", toJson r.goals)],
+    Json.nonemptyList "messages" r.messages,
+    Json.nonemptyList "sorries" r.sorries,
+    Json.nonemptyList "traces" r.traces
+  ]
+/-- Json wrapper for an error. -/
+structure Error where
+  message : String
+deriving ToJson, FromJson
+structure PickleEnvironment where
+  env : Nat
+  pickleTo : System.FilePath
+deriving ToJson, FromJson
+structure UnpickleEnvironment where
+  unpickleEnvFrom : System.FilePath
+deriving ToJson, FromJson
+structure PickleProofState where
+  proofState : Nat
+  pickleTo : System.FilePath
+deriving ToJson, FromJson
+structure UnpickleProofState where
+  unpickleProofStateFrom : System.FilePath
+  env : Option Nat
+deriving ToJson, FromJson
+end REPL

REPL/Lean/ContextInfo.lean ADDED Viewed

	@@ -0,0 +1,9 @@

+import Lean
+namespace Lean.Elab.ContextInfo
+/-- Pretty print an expression in the given `ContextInfo` with the given `LocalContext`. -/
+def ppExpr (ctx : ContextInfo) (lctx : LocalContext) (e : Expr) : IO Format :=
+  ctx.runMetaM lctx (do Meta.ppExpr (← instantiateMVars e))
+end Lean.Elab.ContextInfo

REPL/Lean/Environment.lean ADDED Viewed

	@@ -0,0 +1,31 @@

+import REPL.Util.Pickle
+import Lean.Replay
+open System (FilePath)
+namespace Lean.Environment
+/--
+Pickle an `Environment` to disk.
+We only store:
+* the list of imports
+* the new constants from `Environment.constants`
+and when unpickling, we build a fresh `Environment` from the imports,
+and then add the new constants.
+-/
+def pickle (env : Environment) (path : FilePath) : IO Unit :=
+  _root_.pickle path (env.header.imports, env.constants.map₂)
+/--
+Unpickle an `Environment` from disk.
+We construct a fresh `Environment` with the relevant imports,
+and then replace the new constants.
+-/
+def unpickle (path : FilePath) : IO (Environment × CompactedRegion) := unsafe do
+  let ((imports, map₂), region) ← _root_.unpickle (Array Import × PHashMap Name ConstantInfo) path
+  let env ← importModules imports {} 0
+  return (← env.replay (HashMap.ofList map₂.toList), region)
+end Lean.Environment

REPL/Lean/InfoTree.lean ADDED Viewed

	@@ -0,0 +1,272 @@

+/-
+Copyright (c) 2023 Scott Morrison. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Scott Morrison
+-/
+import Lean
+/-!
+Additional functions to deal with `InfoTree`.
+-/
+open Lean Elab Meta
+namespace Lean.FileMap
+/-- Extract the range of a `Syntax` expressed as lines and columns. -/
+-- Extracted from the private declaration `Lean.Elab.formatStxRange`,
+-- in `Lean.Elab.InfoTree.Main`.
+def stxRange (fileMap : FileMap) (stx : Syntax) : Position × Position :=
+  let pos    := stx.getPos?.getD 0
+  let endPos := stx.getTailPos?.getD pos
+  (fileMap.toPosition pos, fileMap.toPosition endPos)
+end Lean.FileMap
+namespace Lean.Syntax
+/-- Check if a `Syntax` is an explicit invocation of the `sorry` tactic. -/
+def isSorryTactic (stx : Syntax) : Bool :=
+  s!"{stx}" = "(Tactic.tacticSorry \"sorry\")"
+/-- Check if a `Syntax` is an explicit `sorry` term. -/
+def isSorryTerm (stx : Syntax) : Bool :=
+  s!"{stx}" = "(Term.sorry \"sorry\")"
+end Lean.Syntax
+namespace Lean.Elab
+/-- Extract the range of a `Syntax` expressed as lines and columns. -/
+-- Extracted from the private declaration `Lean.Elab.formatStxRange`,
+-- in `Lean.Elab.InfoTree.Main`.
+def stxRange (fileMap : FileMap) (stx : Syntax) : Position × Position :=
+  let pos    := stx.getPos?.getD 0
+  let endPos := stx.getTailPos?.getD pos
+  (fileMap.toPosition pos, fileMap.toPosition endPos)
+end Lean.Elab
+namespace Lean.Elab.Info
+/-- The type of a `Lean.Elab.Info`, as a string. -/
+def kind : Info → String
+  | .ofTacticInfo         _ => "TacticInfo"
+  | .ofTermInfo           _ => "TermInfo"
+  | .ofCommandInfo        _ => "CommmandInfo"
+  | .ofMacroExpansionInfo _ => "MacroExpansionInfo"
+  | .ofOptionInfo         _ => "OptionInfo"
+  | .ofFieldInfo          _ => "FieldInfo"
+  | .ofCompletionInfo     _ => "CompletionInfo"
+  | .ofUserWidgetInfo     _ => "UserWidgetInfo"
+  | .ofCustomInfo         _ => "CustomInfo"
+  | .ofFVarAliasInfo      _ => "FVarAliasInfo"
+  | .ofFieldRedeclInfo    _ => "FieldRedeclInfo"
+  | .ofOmissionInfo       _ => "OmissionInfo"
+/-- The `Syntax` for a `Lean.Elab.Info`, if there is one. -/
+def stx? : Info → Option Syntax
+  | .ofTacticInfo         info => info.stx
+  | .ofTermInfo           info => info.stx
+  | .ofCommandInfo        info => info.stx
+  | .ofMacroExpansionInfo info => info.stx
+  | .ofOptionInfo         info => info.stx
+  | .ofFieldInfo          info => info.stx
+  | .ofCompletionInfo     info => info.stx
+  | .ofUserWidgetInfo     info => info.stx
+  | .ofCustomInfo         info => info.stx
+  | .ofFVarAliasInfo      _    => none
+  | .ofFieldRedeclInfo    info => info.stx
+  | .ofOmissionInfo       info => info.stx
+/-- Is the `Syntax` for this `Lean.Elab.Info` original, or synthetic? -/
+def isOriginal (i : Info) : Bool :=
+  match i.stx? with
+  | none => true   -- Somewhat unclear what to do with `FVarAliasInfo`, so be conservative.
+  | some stx => match stx.getHeadInfo with
+    | .original .. => true
+    | _ => false
+end Lean.Elab.Info
+namespace Lean.Elab.TacticInfo
+/-- Find the name for the outermost `Syntax` in this `TacticInfo`. -/
+def name? (t : TacticInfo) : Option Name :=
+  match t.stx with
+  | Syntax.node _ n _ => some n
+  | _ => none
+/-- Decide whether a tactic is "substantive",
+or is merely a tactic combinator (e.g. `by`, `;`, multiline tactics, parenthesized tactics). -/
+def isSubstantive (t : TacticInfo) : Bool :=
+  match t.name? with
+  | none => false
+  | some `null => false
+  | some ``cdot => false
+  | some ``cdotTk => false
+  | some ``Lean.Parser.Term.byTactic => false
+  | some ``Lean.Parser.Tactic.tacticSeq => false
+  | some ``Lean.Parser.Tactic.tacticSeq1Indented => false
+  | some ``Lean.Parser.Tactic.«tactic_<;>_» => false
+  | some ``Lean.Parser.Tactic.paren => false
+  | _ => true
+end Lean.Elab.TacticInfo
+namespace Lean.Elab.InfoTree
+/--
+Keep `.node` nodes and `.hole` nodes satisfying predicates.
+Returns a `List InfoTree`, although in most situations this will be a singleton.
+-/
+partial def filter (p : Info → Bool) (m : MVarId → Bool := fun _ => false) :
+    InfoTree → List InfoTree
+  | .context ctx tree => tree.filter p m |>.map (.context ctx)
+  | .node info children =>
+    if p info then
+      [.node info (children.toList.map (filter p m)).join.toPArray']
+    else
+      (children.toList.map (filter p m)).join
+  | .hole mvar => if m mvar then [.hole mvar] else []
+/-- Discard all nodes besides `.context` nodes and `TacticInfo` nodes. -/
+partial def retainTacticInfo (tree : InfoTree) : List InfoTree :=
+  tree.filter fun | .ofTacticInfo _ => true | _ => false
+/-- Retain only nodes with "original" syntax. -/
+partial def retainOriginal (tree : InfoTree) : List InfoTree :=
+  tree.filter Info.isOriginal
+/-- Discard all TacticInfo nodes that are tactic combinators or structuring tactics. -/
+-- There is considerable grey area here: what to do with `classical`?
+partial def retainSubstantive (tree : InfoTree) : List InfoTree :=
+  tree.filter fun | .ofTacticInfo i => i.isSubstantive | _ => true
+/-- Analogue of `Lean.Elab.InfoTree.findInfo?`, but that returns all results. -/
+partial def findAllInfo (t : InfoTree) (ctx? : Option ContextInfo) (p : Info → Bool) :
+    List (Info × Option ContextInfo) :=
+  match t with
+  | context ctx t => t.findAllInfo (ctx.mergeIntoOuter? ctx?) p
+  | node i ts  =>
+    let info := if p i then [(i, ctx?)] else []
+    let rest := ts.toList.bind (fun t => t.findAllInfo ctx? p)
+    info ++ rest
+  | _ => []
+/-- Return all `TacticInfo` nodes in an `InfoTree` with "original" syntax,
+each equipped with its relevant `ContextInfo`. -/
+def findTacticNodes (t : InfoTree) : List (TacticInfo × ContextInfo) :=
+  let infos := t.findAllInfo none fun i => match i with
+  | .ofTacticInfo i' => i.isOriginal && i'.isSubstantive
+  | _ => false
+  infos.filterMap fun p => match p with
+  | (.ofTacticInfo i, some ctx) => (i, ctx)
+  | _ => none
+/-- Return all `TacticInfo` nodes in an `InfoTree`
+corresponding to explicit invocations of the `sorry` tactic,
+each equipped with its relevant `ContextInfo`. -/
+def findSorryTacticNodes (t : InfoTree) : List (TacticInfo × ContextInfo) :=
+  let infos := t.findAllInfo none fun i => match i with
+  | .ofTacticInfo i => i.stx.isSorryTactic && !i.goalsBefore.isEmpty
+  | _ => false
+  infos.filterMap fun p => match p with
+  | (.ofTacticInfo i, some ctx) => (i, ctx)
+  | _ => none
+/-- Return all `TermInfo` nodes in an `InfoTree`
+corresponding to explicit `sorry` terms,
+each equipped with its relevant `ContextInfo`. -/
+def findSorryTermNodes (t : InfoTree) : List (TermInfo × ContextInfo) :=
+  let infos := t.findAllInfo none fun i => match i with
+  | .ofTermInfo i => i.stx.isSorryTerm
+  | _ => false
+  infos.filterMap fun p => match p with
+  | (.ofTermInfo i, some ctx) => (i, ctx)
+  | _ => none
+inductive SorryType
+| tactic : MVarId → SorryType
+| term : LocalContext → Option Expr → SorryType
+deriving Inhabited
+/--
+Finds all appearances of `sorry` in an `InfoTree`, reporting
+* the `ContextInfo` at that point,
+* the `MVarId` for a goal that was closed by `sorry`,
+  or the `Option Expr` expected type for a term supplied by `sorry`
+* and the start and end positions of the `sorry` in the file.
+-/
+def sorries (t : InfoTree) : List (ContextInfo × SorryType × Position × Position) :=
+  (t.findSorryTacticNodes.map fun ⟨i, ctx⟩ =>
+    -- HACK: creating a child ngen
+    ({ ctx with mctx := i.mctxBefore, ngen := ctx.ngen.mkChild.1 }, .tactic i.goalsBefore.head!,
+      stxRange ctx.fileMap i.stx)) ++
+  (t.findSorryTermNodes.map fun ⟨i, ctx⟩ =>
+    (ctx, .term i.lctx i.expectedType?, stxRange ctx.fileMap i.stx))
+def tactics (t : InfoTree) : List (ContextInfo × Syntax × List MVarId × Position × Position) :=
+  (t.findTacticNodes.map fun ⟨i, ctx⟩ =>
+    -- HACK: creating a child ngen
+     ({ ctx with mctx := i.mctxBefore, ngen := ctx.ngen.mkChild.1 }, i.stx, i.goalsBefore,
+       stxRange ctx.fileMap i.stx))
+end Lean.Elab.InfoTree
+namespace Lean.Elab.TacticInfo
+/-- Return the range of the tactic, as a pair of file positions. -/
+def range (info : TacticInfo) (ctx : ContextInfo) : Position × Position := ctx.fileMap.stxRange info.stx
+/-- Pretty print a tactic. -/
+def pp (info : TacticInfo) (ctx : ContextInfo) : IO Format :=
+  ctx.runMetaM {} try
+    Lean.PrettyPrinter.ppTactic ⟨info.stx⟩
+  catch _ =>
+    pure "<failed to pretty print>"
+open Meta
+/-- Run a tactic on the goals stored in a `TacticInfo`. -/
+def runMetaMGoalsBefore (info : TacticInfo) (ctx : ContextInfo) (x : List MVarId → MetaM α) : IO α := do
+  ctx.runMetaM {} <| Meta.withMCtx info.mctxBefore <| x info.goalsBefore
+/-- Run a tactic on the after goals stored in a `TacticInfo`. -/
+def runMetaMGoalsAfter (info : TacticInfo) (ctx : ContextInfo) (x : List MVarId → MetaM α) : IO α := do
+  ctx.runMetaM {} <| Meta.withMCtx info.mctxAfter <| x info.goalsAfter
+/-- Run a tactic on the main goal stored in a `TacticInfo`. -/
+def runMetaM (info : TacticInfo) (ctx : ContextInfo) (x : MVarId → MetaM α) : IO α := do
+  match info.goalsBefore.head? with
+  | none => throw <| IO.userError s!"No goals at {← info.pp ctx}"
+  | some g => info.runMetaMGoalsBefore ctx fun _ => do g.withContext <| x g
+def mainGoal (info : TacticInfo) (ctx : ContextInfo) : IO Expr :=
+  info.runMetaM ctx (fun g => do instantiateMVars (← g.getType))
+def formatMainGoal (info : TacticInfo) (ctx : ContextInfo) : IO Format :=
+  info.runMetaM ctx (fun g => do ppExpr (← instantiateMVars (← g.getType)))
+def goalState (info : TacticInfo) (ctx : ContextInfo) : IO (List Format) := do
+  info.runMetaMGoalsBefore ctx (fun gs => gs.mapM fun g => do Meta.ppGoal g)
+def goalStateAfter (info : TacticInfo) (ctx : ContextInfo) : IO (List Format) := do
+  info.runMetaMGoalsAfter ctx (fun gs => gs.mapM fun g => do Meta.ppGoal g)
+def ppExpr (info : TacticInfo) (ctx : ContextInfo) (e : Expr) : IO Format :=
+  info.runMetaM ctx (fun _ => do Meta.ppExpr (← instantiateMVars e))
+end Lean.Elab.TacticInfo
+namespace Lean.Elab.InfoTree
+/--
+Finds all tactic invocations in an `InfoTree`,
+ignoring structuring tactics (e.g. `by`, `;`, multiline tactics, parenthesized tactics).
+-/
+def substantiveTactics (t : InfoTree) : List (TacticInfo × ContextInfo) :=
+  t.findTacticNodes.filter fun i => i.1.isSubstantive
+end Lean.Elab.InfoTree

REPL/Lean/InfoTree/ToJson.lean ADDED Viewed

	@@ -0,0 +1,114 @@

+import REPL.Lean.InfoTree
+import REPL.Lean.ContextInfo
+/-!
+# Exporting an `InfoTree` as Json
+-/
+namespace Lean.Elab
+structure InfoTreeNode (α : Type) where
+  kind : String
+  node : Option α
+  children : List Json
+deriving ToJson
+deriving instance ToJson for Lean.Position
+structure Syntax.Range where
+  synthetic : Bool
+  start : Lean.Position
+  finish : Lean.Position
+deriving ToJson
+structure Syntax.Json where
+  pp : Option String
+  -- raw : String
+  range : Range
+deriving ToJson
+def _root_.Lean.Syntax.toRange (stx : Syntax) (ctx : ContextInfo) : Syntax.Range :=
+  let pos    := stx.getPos?.getD 0
+  let endPos := stx.getTailPos?.getD pos
+  { start := ctx.fileMap.toPosition pos
+    finish := ctx.fileMap.toPosition endPos
+    synthetic := match stx.getHeadInfo with
+    | .original .. => false
+    | _ => true }
+def _root_.Lean.Syntax.toJson (stx : Syntax) (ctx : ContextInfo) (lctx : LocalContext) : IO Syntax.Json := do
+  return {
+    pp := match (← ctx.ppSyntax lctx stx).pretty with
+      | "failed to pretty print term (use 'set_option pp.rawOnError true' for raw representation)" => none
+      | pp => some pp
+    -- raw := toString stx
+    range := stx.toRange ctx }
+structure TacticInfo.Json where
+  name : Option Name
+  stx : Syntax.Json
+  goalsBefore : List String
+  goalsAfter : List String
+deriving ToJson
+-- Note: this is not responsible for converting the children to Json.
+def TacticInfo.toJson (i : TacticInfo) (ctx : ContextInfo) : IO TacticInfo.Json := do
+  return {
+    name := i.name?
+    stx :=
+    { pp := Format.pretty (← i.pp ctx),
+      -- raw := toString i.info.stx,
+      range := i.stx.toRange ctx },
+    goalsBefore := (← i.goalState ctx).map Format.pretty,
+    goalsAfter := (← i.goalStateAfter ctx).map Format.pretty }
+structure CommandInfo.Json where
+  elaborator : Option Name
+  stx : Syntax.Json
+deriving ToJson
+def CommandInfo.toJson (info : CommandInfo) (ctx : ContextInfo) : IO CommandInfo.Json := do
+  return {
+    elaborator := match info.elaborator with | .anonymous => none | n => some n,
+    stx := ← info.stx.toJson ctx {} }
+structure TermInfo.Json where
+  elaborator : Option Name
+  stx : Syntax.Json
+  expectedType? : Option String
+  expr : String
+  isBinder : Bool
+deriving ToJson
+def TermInfo.toJson (info : TermInfo) (ctx : ContextInfo) : IO TermInfo.Json := do
+  return {
+    elaborator := match info.elaborator with | .anonymous => none | n => some n,
+    stx := ← info.stx.toJson ctx info.lctx,
+    expectedType? := ← info.expectedType?.mapM fun ty => do
+      pure (← ctx.ppExpr info.lctx ty).pretty
+    expr := (← ctx.ppExpr info.lctx info.expr).pretty
+    isBinder := info.isBinder }
+structure InfoTree.HoleJson where
+  goalState : String
+deriving ToJson
+partial def InfoTree.toJson (t : InfoTree) (ctx? : Option ContextInfo) : IO Json := do
+  match t with
+  | .context ctx t => t.toJson (ctx.mergeIntoOuter? ctx?)
+  | .node info children =>
+    if let some ctx := ctx? then
+      let node : Option Json ← match info with
+      | .ofTermInfo    info => some <$> (do pure <| Lean.toJson (← info.toJson ctx))
+      | .ofCommandInfo info => some <$> (do pure <| Lean.toJson (← info.toJson ctx))
+      | .ofTacticInfo  info => some <$> (do pure <| Lean.toJson (← info.toJson ctx))
+      | _                   => pure none
+      return Lean.toJson (InfoTreeNode.mk info.kind node (← children.toList.mapM fun t' => t'.toJson ctx))
+    else throw <| IO.userError "No `ContextInfo` available."
+  | .hole mvarId =>
+    if let some ctx := ctx? then
+     return Lean.toJson (InfoTree.HoleJson.mk (← ctx.runMetaM {} (do Meta.ppGoal mvarId)).pretty)
+    else throw <| IO.userError "No `ContextInfo` available."
+end Lean.Elab

REPL/Main.lean ADDED Viewed

	@@ -0,0 +1,323 @@

+/-
+Copyright (c) 2023 Scott Morrison. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Scott Morrison
+-/
+import REPL.JSON
+import REPL.Frontend
+import REPL.Util.Path
+import REPL.Lean.ContextInfo
+import REPL.Lean.Environment
+import REPL.Lean.InfoTree
+import REPL.Lean.InfoTree.ToJson
+import REPL.Snapshots
+/-!
+# A REPL for Lean.
+Communicates via JSON on stdin and stdout. Commands should be separated by blank lines.
+Commands may be of the form
+```
+{ "cmd" : "import Mathlib.Data.List.Basic\ndef f := 2" }
+```
+or
+```
+{ "cmd" : "example : f = 2 := rfl", "env" : 3 }
+```
+The `env` field, if present,
+must contain a number received in the `env` field of a previous response,
+and causes the command to be run in the existing environment.
+If there is no `env` field, a new environment is created.
+You can only use `import` commands when you do not specify the `env` field.
+You can backtrack simply by using earlier values for `env`.
+The results are of the form
+```
+{"sorries":
+ [{"pos": {"line": 1, "column": 18},
+   "endPos": {"line": 1, "column": 23},
+   "goal": "\n⊢ Nat"}],
+ "messages":
+ [{"severity": "error",
+   "pos": {"line": 1, "column": 23},
+   "endPos": {"line": 1, "column": 26},
+   "data":
+   "type mismatch\n  rfl\nhas type\n  f = f : Prop\nbut is expected to have type\n  f = 2 : Prop"}],
+ "env": 6}
+ ```
+ showing any messages generated, or sorries with their goal states.
+ Information is generated for tactic mode sorries, but not for term mode sorries.
+-/
+open Lean Elab
+namespace REPL
+/-- The monadic state for the Lean REPL. -/
+structure State where
+  /--
+  Environment snapshots after complete declarations.
+  The user can run a declaration in a given environment using `{"cmd": "def f := 37", "env": 17}`.
+  -/
+  cmdStates : Array CommandSnapshot := #[]
+  /--
+  Proof states after individual tactics.
+  The user can run a tactic in a given proof state using `{"tactic": "exact 42", "proofState": 5}`.
+  Declarations with containing `sorry` record a proof state at each sorry,
+  and report the numerical index for the recorded state at each sorry.
+  -/
+  proofStates : Array ProofSnapshot := #[]
+/--
+The Lean REPL monad.
+We only use this with `m := IO`, but it is set up as a monad transformer for flexibility.
+-/
+abbrev M (m : Type → Type) := StateT State m
+variable [Monad m] [MonadLiftT IO m]
+/-- Record an `CommandSnapshot` into the REPL state, returning its index for future use. -/
+def recordCommandSnapshot (state : CommandSnapshot) : M m Nat := do
+  let id := (← get).cmdStates.size
+  modify fun s => { s with cmdStates := s.cmdStates.push state }
+  return id
+/-- Record a `ProofSnapshot` into the REPL state, returning its index for future use. -/
+def recordProofSnapshot (proofState : ProofSnapshot) : M m Nat := do
+  let id := (← get).proofStates.size
+  modify fun s => { s with proofStates := s.proofStates.push proofState }
+  return id
+def sorries (trees : List InfoTree) (env? : Option Environment) : M m (List Sorry) :=
+  trees.bind InfoTree.sorries |>.mapM
+    fun ⟨ctx, g, pos, endPos⟩ => do
+      let (goal, proofState) ← match g with
+      | .tactic g => do
+         let s ← ProofSnapshot.create ctx none env? [g]
+         pure ("\n".intercalate <| (← s.ppGoals).map fun s => s!"{s}", some s)
+      | .term lctx (some t) => do
+         let s ← ProofSnapshot.create ctx lctx env? [] [t]
+         pure ("\n".intercalate <| (← s.ppGoals).map fun s => s!"{s}", some s)
+      | .term _ none => unreachable!
+      let proofStateId ← proofState.mapM recordProofSnapshot
+      return Sorry.of goal pos endPos proofStateId
+def ppTactic (ctx : ContextInfo) (stx : Syntax) : IO Format :=
+  ctx.runMetaM {} try
+    Lean.PrettyPrinter.ppTactic ⟨stx⟩
+  catch _ =>
+    pure "<failed to pretty print>"
+def tactics (trees : List InfoTree) : M m (List Tactic) :=
+  trees.bind InfoTree.tactics |>.mapM
+    fun ⟨ctx, stx, goals, pos, endPos⟩ => do
+      let proofState := some (← ProofSnapshot.create ctx none none goals)
+      let goals := s!"{(← ctx.ppGoals goals)}".trim
+      let tactic := Format.pretty (← ppTactic ctx stx)
+      let proofStateId ← proofState.mapM recordProofSnapshot
+      return Tactic.of goals tactic pos endPos proofStateId
+/-- Record a `ProofSnapshot` and generate a JSON response for it. -/
+def createProofStepReponse (proofState : ProofSnapshot) (old? : Option ProofSnapshot := none) :
+    M m ProofStepResponse := do
+  let messages := proofState.newMessages old?
+  let messages ← messages.mapM fun m => Message.of m
+  let traces ← proofState.newTraces old?
+  let trees := proofState.newInfoTrees old?
+  let trees ← match old? with
+  | some old => do
+    let (ctx, _) ← old.runMetaM do return { ← CommandContextInfo.save with }
+    let ctx := PartialContextInfo.commandCtx ctx
+    pure <| trees.map fun t => InfoTree.context ctx t
+  | none => pure trees
+  -- For debugging purposes, sometimes we print out the trees here:
+  -- trees.forM fun t => do IO.println (← t.format)
+  let sorries ← sorries trees none
+  let id ← recordProofSnapshot proofState
+  return {
+    proofState := id
+    goals := (← proofState.ppGoals).map fun s => s!"{s}"
+    messages
+    sorries
+    traces }
+/-- Pickle a `CommandSnapshot`, generating a JSON response. -/
+def pickleCommandSnapshot (n : PickleEnvironment) : M m (CommandResponse ⊕ Error) := do
+  match (← get).cmdStates[n.env]? with
+  | none => return .inr ⟨"Unknown environment."⟩
+  | some env =>
+    discard <| env.pickle n.pickleTo
+    return .inl { env := n.env }
+/-- Unpickle a `CommandSnapshot`, generating a JSON response. -/
+def unpickleCommandSnapshot (n : UnpickleEnvironment) : M IO CommandResponse := do
+  let (env, _) ← CommandSnapshot.unpickle n.unpickleEnvFrom
+  let env ← recordCommandSnapshot env
+  return { env }
+/-- Pickle a `ProofSnapshot`, generating a JSON response. -/
+-- This generates a new identifier, which perhaps is not what we want?
+def pickleProofSnapshot (n : PickleProofState) : M m (ProofStepResponse ⊕ Error) := do
+  match (← get).proofStates[n.proofState]? with
+  | none => return .inr ⟨"Unknown proof State."⟩
+  | some proofState =>
+    discard <| proofState.pickle n.pickleTo
+    return .inl (← createProofStepReponse proofState)
+/-- Unpickle a `ProofSnapshot`, generating a JSON response. -/
+def unpickleProofSnapshot (n : UnpickleProofState) : M IO (ProofStepResponse ⊕ Error) := do
+  let (cmdSnapshot?, notFound) ← do match n.env with
+  | none => pure (none, false)
+  | some i => do match (← get).cmdStates[i]? with
+    | some env => pure (some env, false)
+    | none => pure (none, true)
+  if notFound then
+    return .inr ⟨"Unknown environment."⟩
+  let (proofState, _) ← ProofSnapshot.unpickle n.unpickleProofStateFrom cmdSnapshot?
+  Sum.inl <$> createProofStepReponse proofState
+/--
+Run a command, returning the id of the new environment, and any messages and sorries.
+-/
+def runCommand (s : Command) : M IO (CommandResponse ⊕ Error) := do
+  let (cmdSnapshot?, notFound) ← do match s.env with
+  | none => pure (none, false)
+  | some i => do match (← get).cmdStates[i]? with
+    | some env => pure (some env, false)
+    | none => pure (none, true)
+  if notFound then
+    return .inr ⟨"Unknown environment."⟩
+  let initialCmdState? := cmdSnapshot?.map fun c => c.cmdState
+  let (cmdState, messages, trees) ← try
+    IO.processInput s.cmd initialCmdState?
+  catch ex =>
+    return .inr ⟨ex.toString⟩
+  let messages ← messages.mapM fun m => Message.of m
+  -- For debugging purposes, sometimes we print out the trees here:
+  -- trees.forM fun t => do IO.println (← t.format)
+  let sorries ← sorries trees (initialCmdState?.map (·.env))
+  let tactics ← match s.allTactics with
+  | some true => tactics trees
+  | _ => pure []
+  let cmdSnapshot :=
+  { cmdState
+    cmdContext := (cmdSnapshot?.map fun c => c.cmdContext).getD
+      { fileName := "", fileMap := default, tacticCache? := none } }
+  let env ← recordCommandSnapshot cmdSnapshot
+  let jsonTrees := match s.infotree with
+  | some "full" => trees
+  | some "tactics" => trees.bind InfoTree.retainTacticInfo
+  | some "original" => trees.bind InfoTree.retainTacticInfo |>.bind InfoTree.retainOriginal
+  | some "substantive" => trees.bind InfoTree.retainTacticInfo |>.bind InfoTree.retainSubstantive
+  | _ => []
+  let infotree := if jsonTrees.isEmpty then
+    none
+  else
+    some <| Json.arr (← jsonTrees.toArray.mapM fun t => t.toJson none)
+  return .inl
+    { env,
+      messages,
+      sorries,
+      tactics
+      infotree }
+def processFile (s : File) : M IO (CommandResponse ⊕ Error) := do
+  try
+    let cmd ← IO.FS.readFile s.path
+    runCommand { s with env := none, cmd }
+  catch e =>
+    pure <| .inr ⟨e.toString⟩
+/--
+Run a single tactic, returning the id of the new proof statement, and the new goals.
+-/
+-- TODO detect sorries?
+def runProofStep (s : ProofStep) : M IO (ProofStepResponse ⊕ Error) := do
+  match (← get).proofStates[s.proofState]? with
+  | none => return .inr ⟨"Unknown proof state."⟩
+  | some proofState =>
+    try
+      let proofState' ← proofState.runString s.tactic
+      return .inl (← createProofStepReponse proofState' proofState)
+    catch ex =>
+      return .inr ⟨"Lean error:\n" ++ ex.toString⟩
+end REPL
+open REPL
+/-- Get lines from stdin until a blank line is entered. -/
+partial def getLines : IO String := do
+  let line ← (← IO.getStdin).getLine
+  if line.trim.isEmpty then
+    return line
+  else
+    return line ++ (← getLines)
+instance [ToJson α] [ToJson β] : ToJson (α ⊕ β) where
+  toJson x := match x with
+  | .inl a => toJson a
+  | .inr b => toJson b
+/-- Commands accepted by the REPL. -/
+inductive Input
+| command : REPL.Command → Input
+| file : REPL.File → Input
+| proofStep : REPL.ProofStep → Input
+| pickleEnvironment : REPL.PickleEnvironment → Input
+| unpickleEnvironment : REPL.UnpickleEnvironment → Input
+| pickleProofSnapshot : REPL.PickleProofState → Input
+| unpickleProofSnapshot : REPL.UnpickleProofState → Input
+/-- Parse a user input string to an input command. -/
+def parse (query : String) : IO Input := do
+  let json := Json.parse query
+  match json with
+  | .error e => throw <| IO.userError <| toString <| toJson <|
+      (⟨"Could not parse JSON:\n" ++ e⟩ : Error)
+  | .ok j => match fromJson? j with
+    | .ok (r : REPL.ProofStep) => return .proofStep r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.PickleEnvironment) => return .pickleEnvironment r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.UnpickleEnvironment) => return .unpickleEnvironment r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.PickleProofState) => return .pickleProofSnapshot r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.UnpickleProofState) => return .unpickleProofSnapshot r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.Command) => return .command r
+    | .error _ => match fromJson? j with
+    | .ok (r : REPL.File) => return .file r
+    | .error e => throw <| IO.userError <| toString <| toJson <|
+        (⟨"Could not parse as a valid JSON command:\n" ++ e⟩ : Error)
+/-- Read-eval-print loop for Lean. -/
+unsafe def repl : IO Unit :=
+  StateT.run' loop {}
+where loop : M IO Unit := do
+  let query ← getLines
+  if query = "" then
+    return ()
+  if query.startsWith "#" || query.startsWith "--" then loop else
+  IO.println <| toString <| ← match ← parse query with
+  | .command r => return toJson (← runCommand r)
+  | .file r => return toJson (← processFile r)
+  | .proofStep r => return toJson (← runProofStep r)
+  | .pickleEnvironment r => return toJson (← pickleCommandSnapshot r)
+  | .unpickleEnvironment r => return toJson (← unpickleCommandSnapshot r)
+  | .pickleProofSnapshot r => return toJson (← pickleProofSnapshot r)
+  | .unpickleProofSnapshot r => return toJson (← unpickleProofSnapshot r)
+  IO.println "" -- easier to parse the output if there are blank lines
+  loop
+/-- Main executable function, run as `lake exe repl`. -/
+unsafe def main (_ : List String) : IO Unit := do
+  initSearchPath (← Lean.findSysroot)
+  repl

REPL/Snapshots.lean ADDED Viewed

	@@ -0,0 +1,306 @@

+/-
+Copyright (c) 2023 Lean FRO, LLC. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Scott Morrison
+-/
+import Lean.Replay
+import Lean.Elab.Command
+import REPL.Util.Pickle
+open Lean Elab
+namespace Lean.Elab.Command
+@[inline] def CommandElabM.run (x : CommandElabM α) (ctx : Context) (s : State) : EIO Exception (α × State) :=
+  (x ctx).run s
+@[inline] def CommandElabM.run' (x : CommandElabM α) (ctx : Context) (s : State) : EIO Exception α :=
+  Prod.fst <$> x.run ctx s
+@[inline] def CommandElabM.toIO (x : CommandElabM α) (ctx : Context) (s : State) : IO (α × State) := do
+  match (← (x.run ctx s).toIO') with
+  | Except.error (Exception.error _ msg)   => throw <| IO.userError (← msg.toString)
+  | Except.error (Exception.internal id _) => throw <| IO.userError <| "internal exception #" ++ toString id.idx
+  | Except.ok a                            => return a
+end Lean.Elab.Command
+namespace REPL
+/--
+Bundled structure for the `State` and `Context` objects
+for the `CommandElabM` monad.
+-/
+structure CommandSnapshot where
+  cmdState     : Command.State
+  cmdContext   : Command.Context
+namespace CommandSnapshot
+open Lean.Elab.Command
+/-- A copy of `Command.State` with the `Environment`, caches, and logging omitted. -/
+structure CompactableCommandSnapshot where
+  -- env         : Environment
+  scopes         : List Scope := [{ header := "" }]
+  nextMacroScope : Nat     := firstFrontendMacroScope + 1
+  maxRecDepth    : Nat
+  nextInstIdx    : Nat := 1 -- for generating anonymous instance names
+  ngen           : NameGenerator  := {}
+  -- infoState   : InfoState := {}
+  -- traceState  : TraceState := {}
+  -- messages    : MessageLog := {}
+open System (FilePath)
+/--
+Run a `CommandElabM` monadic function in the current `ProofSnapshot`,
+updating the `Command.State`.
+-/
+def runCommandElabM (p : CommandSnapshot) (t : CommandElabM α) : IO (α × CommandSnapshot) := do
+  let (a, cmdState) ← (CommandElabM.toIO · p.cmdContext p.cmdState) do t
+  return (a, { p with cmdState })
+/--
+Pickle a `CommandSnapshot`, discarding closures and non-essential caches.
+When pickling the `Environment`, we do so relative to its imports.
+-/
+def pickle (p : CommandSnapshot) (path : FilePath) : IO Unit := do
+  let env := p.cmdState.env
+  let p' := { p with cmdState := { p.cmdState with env := ← mkEmptyEnvironment }}
+  _root_.pickle path
+    (env.header.imports,
+     env.constants.map₂,
+     ({ p'.cmdState with } : CompactableCommandSnapshot),
+     p'.cmdContext)
+/--
+Unpickle a `CommandSnapshot`.
+-/
+def unpickle (path : FilePath) : IO (CommandSnapshot × CompactedRegion) := unsafe do
+  let ((imports, map₂, cmdState, cmdContext), region) ←
+    _root_.unpickle (Array Import × PHashMap Name ConstantInfo × CompactableCommandSnapshot ×
+      Command.Context) path
+  let env ← (← importModules imports {} 0).replay (HashMap.ofList map₂.toList)
+  let p' : CommandSnapshot :=
+  { cmdState := { cmdState with env }
+    cmdContext }
+  let (_, p'') ← p'.runCommandElabM do
+    for o in ← getOpenDecls do
+      if let .simple ns _ := o then do
+        activateScoped ns
+  return (p'', region)
+end CommandSnapshot
+/--
+Bundled structure for the `State` and `Context` objects
+for the `CoreM`, `MetaM`, `TermElabM`, and `TacticM` monads.
+-/
+structure ProofSnapshot where
+  coreState     : Core.State
+  coreContext   : Core.Context
+  metaState     : Meta.State
+  metaContext   : Meta.Context
+  termState     : Term.State
+  termContext   : Term.Context
+  tacticState   : Tactic.State
+  tacticContext : Tactic.Context
+namespace ProofSnapshot
+open Lean Elab Tactic
+/-- New messages in a `ProofSnapshot`, relative to an optional previous `ProofSnapshot`. -/
+def newMessages (new : ProofSnapshot) (old? : Option ProofSnapshot := none) : List Lean.Message :=
+  match old? with
+  | none => new.coreState.messages.msgs.toList
+  | some old => new.coreState.messages.msgs.toList.drop (old.coreState.messages.msgs.size)
+/-- New info trees in a `ProofSnapshot`, relative to an optional previous `ProofSnapshot`. -/
+def newInfoTrees (new : ProofSnapshot) (old? : Option ProofSnapshot := none) : List InfoTree :=
+  let infoState := new.coreState.infoState
+  let trees := match old? with
+  | none => infoState.trees.toList
+  | some old => infoState.trees.toList.drop (old.coreState.infoState.trees.size)
+  trees.map fun t => t.substitute infoState.assignment
+/-- Run a `CoreM` monadic function in the current `ProofSnapshot`, updating the `Core.State`. -/
+def runCoreM (p : ProofSnapshot) (t : CoreM α) : IO (α × ProofSnapshot) := do
+  let (a, coreState) ← (Lean.Core.CoreM.toIO · p.coreContext p.coreState) do t
+  return (a, { p with coreState })
+/-- Run a `MetaM` monadic function in the current `ProofSnapshot`, updating the `Meta.State`. -/
+def runMetaM (p : ProofSnapshot) (t : MetaM α) : IO (α × ProofSnapshot) := do
+  let ((a, metaState), p') ←
+    p.runCoreM (Lean.Meta.MetaM.run (ctx := p.metaContext) (s := p.metaState) do t)
+  return (a, { p' with metaState })
+/-- Run a `TermElabM` monadic function in the current `ProofSnapshot`, updating the `Term.State`. -/
+def runTermElabM (p : ProofSnapshot) (t : TermElabM α) : IO (α × ProofSnapshot) := do
+  let ((a, termState), p') ← p.runMetaM (Lean.Elab.Term.TermElabM.run (s := p.termState)
+    (do let r ← t; Term.synthesizeSyntheticMVarsNoPostponing; pure r))
+  return (a, { p' with termState })
+/-- Run a `TacticM` monadic function in the current `ProofSnapshot`, updating the `Tactic.State`. -/
+def runTacticM (p : ProofSnapshot) (t : TacticM α) : IO (α × ProofSnapshot) := do
+  let ((a, tacticState), p') ← p.runTermElabM (t p.tacticContext |>.run p.tacticState)
+  return (a, { p' with tacticState })
+/--
+Run a `TacticM` monadic function in the current `ProofSnapshot`, updating the `Tactic.State`,
+and discarding the return value.
+-/
+def runTacticM' (p : ProofSnapshot) (t : TacticM α) : IO ProofSnapshot :=
+  Prod.snd <$> p.runTacticM t
+/-- New traces in a `ProofSnapshot`, relative to an optional previous `ProofSnapshot`. -/
+def newTraces (new : ProofSnapshot) (old? : Option ProofSnapshot := none) : IO (List String) :=
+  match old? with
+  | none => (·.1) <$> new.runCoreM (do
+     (← getTraces).toList.mapM fun t => do pure (← t.msg.toString).trim)
+  | some old => do
+    let oldCount ← (·.1) <$> old.runCoreM (return (← getTraces).size)
+    (·.1) <$> new.runCoreM (do
+     ((← getTraces).toList.drop oldCount).mapM fun t => do pure (← t.msg.toString).trim)
+/--
+Evaluate a `Syntax` into a `TacticM` tactic, and run it in the current `ProofSnapshot`.
+-/
+def runSyntax (p : ProofSnapshot) (t : Syntax) : IO ProofSnapshot :=
+  Prod.snd <$> p.runTacticM (evalTactic t)
+/--
+Parse a string into a `Syntax`, evaluate it as a `TacticM` tactic,
+and run it in the current `ProofSnapshot`.
+-/
+def runString (p : ProofSnapshot) (t : String) : IO ProofSnapshot :=
+  match Parser.runParserCategory p.coreState.env `tactic t with
+  | .error e => throw (IO.userError e)
+  | .ok stx => p.runSyntax stx
+/-- Pretty print the current goals in the `ProofSnapshot`. -/
+def ppGoals (p : ProofSnapshot) : IO (List Format) :=
+  Prod.fst <$> p.runMetaM do p.tacticState.goals.mapM (Meta.ppGoal ·)
+/--
+Construct a `ProofSnapshot` from a `ContextInfo` and optional `LocalContext`, and a list of goals.
+For convenience, we also allow a list of `Expr`s, and these are appended to the goals
+as fresh metavariables with the given types.
+-/
+def create (ctx : ContextInfo) (lctx? : Option LocalContext) (env? : Option Environment)
+    (goals : List MVarId) (types : List Expr := []) : IO ProofSnapshot := do
+  ctx.runMetaM (lctx?.getD {}) do
+    let goals := goals ++ (← types.mapM fun t => Expr.mvarId! <$> Meta.mkFreshExprMVar (some t))
+    goals.head!.withContext do
+    let s ← getThe Core.State
+    let s := match env? with
+    | none => s
+    | some env => { s with env }
+    pure <|
+    { coreState := s
+      coreContext := ← readThe Core.Context
+      metaState := ← getThe Meta.State
+      metaContext := ← readThe Meta.Context
+      termState := {}
+      termContext := {}
+      tacticState := { goals }
+      tacticContext := { elaborator := .anonymous } }
+open Lean.Core in
+/-- A copy of `Core.State` with the `Environment`, caches, and logging omitted. -/
+structure CompactableCoreState where
+  -- env             : Environment
+  nextMacroScope  : MacroScope     := firstFrontendMacroScope + 1
+  ngen            : NameGenerator  := {}
+  -- traceState      : TraceState     := {}
+  -- cache           : Core.Cache     := {}
+  -- messages        : MessageLog     := {}
+  -- infoState       : Elab.InfoState := {}
+open Lean.Meta in
+/-- A copy of `Meta.Context` with closures omitted. -/
+structure CompactableMetaContext where
+  config            : Config               := {}
+  lctx              : LocalContext         := {}
+  localInstances    : LocalInstances       := #[]
+  defEqCtx?         : Option DefEqContext  := none
+  synthPendingDepth : Nat                  := 0
+  -- canUnfold?        : Option (Config → ConstantInfo → CoreM Bool) := none
+/-- A copy of `Term.Context` with closures and a cache omitted. -/
+structure CompactableTermContext where
+  declName? : Option Name := none
+  auxDeclToFullName : FVarIdMap Name  := {}
+  macroStack        : MacroStack      := []
+  mayPostpone : Bool := true
+  errToSorry : Bool := true
+  autoBoundImplicit  : Bool            := false
+  autoBoundImplicits : PArray Expr := {}
+  -- autoBoundImplicitForbidden : Name → Bool := fun _ => false
+  sectionVars        : NameMap Name    := {}
+  sectionFVars       : NameMap Expr    := {}
+  implicitLambda     : Bool            := true
+  isNoncomputableSection : Bool        := false
+  ignoreTCFailures : Bool := false
+  inPattern        : Bool := false
+  -- tacticCache?     : Option (IO.Ref Tactic.Cache) := none
+  saveRecAppSyntax : Bool := true
+  holesAsSyntheticOpaque : Bool := false
+open System (FilePath)
+/--
+Pickle a `ProofSnapshot`, discarding closures and non-essential caches.
+When pickling the `Environment`, we do so relative to its imports.
+-/
+def pickle (p : ProofSnapshot) (path : FilePath) : IO Unit := do
+  let env := p.coreState.env
+  let p' := { p with coreState := { p.coreState with env := ← mkEmptyEnvironment }}
+  _root_.pickle path
+    (env.header.imports,
+     env.constants.map₂,
+     ({ p'.coreState with } : CompactableCoreState),
+     p'.coreContext,
+     p'.metaState,
+     ({ p'.metaContext with } : CompactableMetaContext),
+     p'.termState,
+     ({ p'.termContext with } : CompactableTermContext),
+     p'.tacticState,
+     p'.tacticContext)
+/--
+Unpickle a `ProofSnapshot`.
+-/
+def unpickle (path : FilePath) (cmd? : Option CommandSnapshot) :
+    IO (ProofSnapshot × CompactedRegion) := unsafe do
+  let ((imports, map₂, coreState, coreContext, metaState, metaContext, termState, termContext,
+    tacticState, tacticContext), region) ←
+    _root_.unpickle (Array Import × PHashMap Name ConstantInfo × CompactableCoreState ×
+      Core.Context × Meta.State × CompactableMetaContext × Term.State × CompactableTermContext ×
+      Tactic.State × Tactic.Context) path
+  let env ← match cmd? with
+  | none =>
+    enableInitializersExecution
+    (← importModules imports {} 0).replay (HashMap.ofList map₂.toList)
+  | some cmd =>
+    cmd.cmdState.env.replay (HashMap.ofList map₂.toList)
+  let p' : ProofSnapshot :=
+  { coreState := { coreState with env }
+    coreContext
+    metaState
+    metaContext := { metaContext with }
+    termState
+    termContext := { termContext with }
+    tacticState
+    tacticContext }
+  let (_, p'') ← p'.runCoreM do
+    for o in ← getOpenDecls do
+      if let .simple ns _ := o then
+        activateScoped ns
+  return (p'', region)
+end ProofSnapshot

REPL/Util/Path.lean ADDED Viewed

	@@ -0,0 +1,36 @@

+/-
+Copyright (c) 2022 Gabriel Ebner. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Gabriel Ebner
+-/
+import Lean
+-- This has been duplicated from Std4 to avoid a dependency.
+/-!
+# `compile_time_search_path%` term elaborator.
+Use this as `searchPathRef.set compile_time_search_path%`.
+-/
+open Lean System
+-- Ideally this instance would be constructed simply by `deriving instance ToExpr for FilePath`
+-- but for now we have decided not to upstream the `ToExpr` derive handler from `Mathlib`.
+-- https://leanprover.zulipchat.com/#narrow/stream/348111-std4/topic/ToExpr.20derive.20handler/near/386476438
+instance : ToExpr FilePath where
+  toTypeExpr := mkConst ``FilePath
+  toExpr path := mkApp (mkConst ``FilePath.mk) (toExpr path.1)
+/--
+Term elaborator that retrieves the current `SearchPath`.
+Typical usage is `searchPathRef.set compile_time_search_path%`.
+This must not be used in files that are potentially compiled on another machine and then
+imported.
+(That is, if used in an imported file it will embed the search path from whichever machine
+compiled the `.olean`.)
+-/
+elab "compile_time_search_path%" : term =>
+  return toExpr (← searchPathRef.get)

REPL/Util/Pickle.lean ADDED Viewed

	@@ -0,0 +1,44 @@

+/-
+Copyright (c) 2023 Mario Carneiro. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Mario Carneiro
+-/
+import Lean.Environment
+/-!
+# Pickling and unpickling objects
+By abusing `saveModuleData` and `readModuleData` we can pickle and unpickle objects to disk.
+-/
+open Lean System
+/--
+Save an object to disk.
+If you need to write multiple objects from within a single declaration,
+you will need to provide a unique `key` for each.
+-/
+def pickle {α : Type} (path : FilePath) (x : α) (key : Name := by exact decl_name%) : IO Unit :=
+  saveModuleData path key (unsafe unsafeCast x)
+/--
+Load an object from disk.
+Note: The returned `CompactedRegion` can be used to free the memory behind the value
+of type `α`, using `CompactedRegion.free` (which is only safe once all references to the `α` are
+released). Ignoring the `CompactedRegion` results in the data being leaked.
+Use `withUnpickle` to call `CompactedRegion.free` automatically.
+This function is unsafe because the data being loaded may not actually have type `α`, and this
+may cause crashes or other bad behavior.
+-/
+unsafe def unpickle (α : Type) (path : FilePath) : IO (α × CompactedRegion) := do
+  let (x, region) ← readModuleData path
+  pure (unsafeCast x, region)
+/-- Load an object from disk and run some continuation on it, freeing memory afterwards. -/
+unsafe def withUnpickle [Monad m] [MonadLiftT IO m] {α β : Type}
+    (path : FilePath) (f : α → m β) : m β := do
+  let (x, region) ← unpickle α path
+  let r ← f x
+  region.free
+  pure r

__pycache__/code.cpython-310.pyc ADDED Viewed

Binary file (1.89 kB). View file

__pycache__/code.cpython-39.pyc ADDED Viewed

Binary file (1.84 kB). View file

__pycache__/openllm_pass_rate_new_test.cpython-39.pyc ADDED Viewed

Binary file (7.79 kB). View file

all_code.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import json
+# from mariana.repl.pass_rate_new import main
+def handle():
+    data = json.load(open('pass_rate_results/lean4_basic_test/lean4_random_15k_all.jsonl'))
+    # data['results'] += json.load(open('pass_rate_results/gsm8k_train/lean4_random_15k_all.jsonl'))['results']
+    PROMPT_DICT = {
+        "wild": (
+            "# Problem:\n{question}\n\n"
+            "# Proof:\n{answer}."
+        ),
+        "lean4": (
+            "Statement and proof in natural language:\n\n"
+            "{statement_text}\n\n"
+            "Translate the statement and proof in natural language to lean4:"
+        ),
+        "prompt_no_input": (
+        "Below is an instruction that describes a task. "
+        "Write a response that appropriately completes the request.\n\n"
+        "### Instruction:\n{instruction}\n\n### Response:"
+        ),
+    }
+    training_data  = []
+    ratio = []
+    for item in data['results']:
+        if item['status'] == 'pass':
+            if  not len(item['stderr']):
+                ratio.append(1)
+                # training_data.append(
+                #     {
+                #         "statement_poof":item['statement'],
+                #         "model_response":PROMPT_DICT["wild"].format(question= item['content']['question'], answer = item['content']['answer']),
+                #         "task": "statementproof_inform",
+                #     }
+                # )
+            else:
+                ratio.append(0)
+                print ( item['stderr'])
+    # with open("pass_rate_results/combined_lean4_random_15k_all_passed.jsonl", "w") as f:
+    #     json.dump(training_data, f, ensure_ascii=False, indent=2)
+    print("false positives: ", 1 - sum(ratio)/len(ratio))
+def savetojson():
+    import_statements = '''import algebra.algebra.basic
+import algebra.order.floor
+import algebra.associated
+import algebra.big_operators.basic
+import algebra.big_operators.enat
+import algebra.big_operators.order
+import algebra.big_operators.pi
+import algebra.geom_sum
+import algebra.group.pi
+import algebra.group.commute
+import algebra.group_power.basic
+import algebra.group_power.identities
+import algebra.order.floor
+import algebra.quadratic_discriminant
+import algebra.ring.basic
+import analysis.asymptotics.asymptotic_equivalent
+import analysis.mean_inequalities
+import analysis.normed_space.basic
+import analysis.inner_product_space.basic
+import analysis.inner_product_space.euclidean_dist
+import analysis.normed_space.pi_Lp
+import analysis.special_functions.exp
+import analysis.special_functions.exp_deriv
+import analysis.special_functions.log
+import analysis.special_functions.logb
+import analysis.special_functions.log_deriv
+import analysis.special_functions.pow
+import analysis.special_functions.sqrt
+import analysis.special_functions.trigonometric.basic
+import analysis.special_functions.trigonometric.complex
+import combinatorics.simple_graph.basic
+import data.complex.basic
+import data.complex.exponential
+import data.finset.basic
+import data.fintype.card
+import data.int.basic
+import data.int.gcd
+import data.int.modeq
+import data.int.parity
+import data.list.intervals
+import data.list.palindrome
+import data.multiset.basic
+import data.nat.basic
+import data.nat.choose.basic
+import data.nat.digits
+import data.nat.factorial.basic
+import data.nat.fib
+import data.nat.modeq
+import data.nat.multiplicity
+import data.nat.parity
+import data.nat.prime
+import data.pnat.basic
+import data.pnat.prime
+import data.polynomial
+import data.polynomial.basic
+import data.polynomial.eval
+import data.rat.basic
+import data.real.basic
+import data.real.ennreal
+import data.real.irrational
+import data.real.nnreal
+import data.real.sqrt
+import data.real.golden_ratio
+import data.set.finite
+import data.sym.sym2
+import data.zmod.basic
+import dynamics.fixed_points.basic
+import field_theory.finite.basic
+import geometry.euclidean.basic
+import geometry.euclidean.circumcenter
+import geometry.euclidean.monge_point
+import geometry.euclidean.sphere
+import init.data.nat.gcd
+import linear_algebra.affine_space.affine_map
+import linear_algebra.affine_space.independent
+import linear_algebra.affine_space.ordered
+import linear_algebra.finite_dimensional
+import logic.equiv.basic
+import measure_theory.integral.interval_integral
+import number_theory.arithmetic_function
+import number_theory.legendre_symbol.quadratic_reciprocity
+import number_theory.primes_congruent_one
+import order.bounds
+import order.filter.basic
+import order.well_founded
+import topology.basic
+import topology.instances.nnreal
+'''
+    data = {
+        "working_file": import_statements
+    }
+    with open('data/notlean_dependency.json', 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=4)
+def load_to_atp():
+    data_path = 'pass_rate_results/math_train/1/10pass10.jsonl'
+    data = json.load(open(data_path, "r", encoding='utf-8'))
+    import pdb
+    pdb.set_trace()
+if __name__ == '__main__':
+    load_to_atp()
+    # get_novel_premises()
+    # savetojson()

basic_working.json ADDED Viewed

The diff for this file is too large to render. See raw diff

code.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import subprocess
+import json
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def main(args):
+    command_list = []
+    for i in range(8):
+        with open(f"{args.input_path}/{i}.json", 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    json_item['cmd']  = '\n'.join()
+                except:
+                    import pdb
+                    pdb.set_trace()
+                command_list.append(json_item)
+    results = []
+    passed = 0
+    total = 0
+    for item in command_list:
+        data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                'file_path': item['file_path'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+            passed += 1
+        except subprocess.CalledProcessError as e:
+            results.append({
+                'file_path': item['file_path'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

data/basic_working.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/notlean_dependency.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "working_file": "import algebra.algebra.basic\nimport algebra.order.floor\nimport algebra.associated\nimport algebra.big_operators.basic\nimport algebra.big_operators.enat\nimport algebra.big_operators.order\nimport algebra.big_operators.pi\nimport algebra.geom_sum\nimport algebra.group.pi\nimport algebra.group.commute\nimport algebra.group_power.basic\nimport algebra.group_power.identities\nimport algebra.order.floor\nimport algebra.quadratic_discriminant\nimport algebra.ring.basic\nimport analysis.asymptotics.asymptotic_equivalent\nimport analysis.mean_inequalities\nimport analysis.normed_space.basic\nimport analysis.inner_product_space.basic\nimport analysis.inner_product_space.euclidean_dist\nimport analysis.normed_space.pi_Lp\nimport analysis.special_functions.exp\nimport analysis.special_functions.exp_deriv\nimport analysis.special_functions.log\nimport analysis.special_functions.logb\nimport analysis.special_functions.log_deriv\nimport analysis.special_functions.pow\nimport analysis.special_functions.sqrt\nimport analysis.special_functions.trigonometric.basic\nimport analysis.special_functions.trigonometric.complex\nimport combinatorics.simple_graph.basic\nimport data.complex.basic\nimport data.complex.exponential\nimport data.finset.basic\nimport data.fintype.card\nimport data.int.basic\nimport data.int.gcd\nimport data.int.modeq\nimport data.int.parity\nimport data.list.intervals\nimport data.list.palindrome\nimport data.multiset.basic\nimport data.nat.basic\nimport data.nat.choose.basic\nimport data.nat.digits\nimport data.nat.factorial.basic\nimport data.nat.fib\nimport data.nat.modeq\nimport data.nat.multiplicity\nimport data.nat.parity\nimport data.nat.prime\nimport data.pnat.basic\nimport data.pnat.prime\nimport data.polynomial\nimport data.polynomial.basic\nimport data.polynomial.eval\nimport data.rat.basic\nimport data.real.basic\nimport data.real.ennreal\nimport data.real.irrational\nimport data.real.nnreal\nimport data.real.sqrt\nimport data.real.golden_ratio\nimport data.set.finite\nimport data.sym.sym2\nimport data.zmod.basic\nimport dynamics.fixed_points.basic\nimport field_theory.finite.basic\nimport geometry.euclidean.basic\nimport geometry.euclidean.circumcenter\nimport geometry.euclidean.monge_point\nimport geometry.euclidean.sphere\nimport init.data.nat.gcd\nimport linear_algebra.affine_space.affine_map\nimport linear_algebra.affine_space.independent\nimport linear_algebra.affine_space.ordered\nimport linear_algebra.finite_dimensional\nimport logic.equiv.basic\nimport measure_theory.integral.interval_integral\nimport number_theory.arithmetic_function\nimport number_theory.legendre_symbol.quadratic_reciprocity\nimport number_theory.primes_congruent_one\nimport order.bounds\nimport order.filter.basic\nimport order.well_founded\nimport topology.basic\nimport topology.instances.nnreal\n"
+}

gpt_pass_rate_multi_pass.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import pdb
+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string, k):
+    pattern = r"gpt_result/(\w+)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part2 = match.group(2)
+        result = f"gpt_result/{part2}/{part1}_pass{k}.json"
+        print(result)
+        return result
+    else:
+        print("No match found.")
+        return None
+# List of input paths
+input_path_lists = [
+    # "gpt_result/lean_basic/gpt4/",
+    # "gpt_result/lean_random/gpt4/",
+    "gpt_result/wild/gpt4/",
+    # "gpt_result/lean_basic/gpt3/",
+    # "gpt_result/lean_random/gpt3/",
+    "gpt_result/wild/gpt3/",
+]
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    k = 5
+    if "wild" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
+        print(f"wild")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 gpt_pass_rate_new_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)}  --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))
+    else:
+        print(f"lean")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 gpt_pass_rate_new_test.py  --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))

gpt_pass_rate_new_notlean_test.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        # result_dict = filter_json(item)
+        result_dict = item
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def get_lean(text):
+    content = ""
+    try:
+        code_block_pattern = r"```lean\s*\n(.*?)\n```"
+        code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+        content = "\n\n".join(code_blocks)
+    except:
+        matches = re.findall(r'```(.*?)```', text, re.DOTALL)
+        if len(matches):
+            content = "\n\n".join(matches)
+    finally:
+        if not len(content.strip()):
+            try:
+                code_block_pattern = r"```lean4\s*\n(.*?)\n```"
+                code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+                content = "\n\n".join(code_blocks)
+            except:
+                content = ''
+    if not len(content.strip()):
+        content = "theorem h : f + g = 39 := by exact rfl"
+    return content
+def main(args):
+    command_list = []
+    # json_filename = 'data/notlean_dependency.json'
+    json_filename = 'data/basic_working.json'
+    json_item = json.load(open(json_filename, encoding='utf-8'))
+    working_env = json_item['working_file']
+    all_dicts = {}
+    with open(f"{args.input_path}/1.jsonl", 'r', encoding='utf-8') as rf:
+        for line in rf.readlines():
+            try:
+                json_item = json.loads(line)
+                text = get_lean(json_item['model_response']).split("#align")[0]
+                json_item['cmd'] = ['\n\n'.join([working_env, text])]
+                all_dicts[json_item['query_id']] = json_item
+                assert len(text) > 0
+            except:
+                import pdb
+                pdb.set_trace()
+    file_pattern = os.path.join(args.input_path, '[2-9]*.jsonl')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    text = get_lean(json_item['model_response']).split("#align")[0]
+                    all_dicts[json_item['query_id']]['cmd'].append('\n\n'.join([working_env, text]))
+                    assert len(text) > 0
+                except:
+                    import pdb
+                    pdb.set_trace()
+    for k, v in all_dicts.items():
+        command_list.append(v)
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

gpt_pass_rate_new_test.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def get_lean(text):
+    content = ""
+    try:
+        code_block_pattern = r"```lean\s*\n(.*?)\n```"
+        code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+        content = "\n\n".join(code_blocks)
+    except:
+        matches = re.findall(r'```(.*?)```', text, re.DOTALL)
+        if len(matches):
+            content = "\n\n".join(matches)
+    finally:
+        if not len(content.strip()):
+            try:
+                code_block_pattern = r"```lean4\s*\n(.*?)\n```"
+                code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+                content = "\n\n".join(code_blocks)
+            except:
+                content = ''
+    if not len(content.strip()):
+        content = "theorem h : f + g = 39 := by exact rfl"
+    return content
+def main(args):
+    command_list = []
+    all_dicts = {}
+    with open(f"{args.input_path}/1.jsonl", 'r', encoding='utf-8') as rf:
+        for line in rf.readlines():
+            try:
+                json_item = json.loads(line)
+                working_env = json_item['working_file']
+                text = get_lean(json_item['model_response']).split("#align")[0]
+                json_item['cmd'] = ['\n\n'.join([working_env, text])]
+                json_item['answer'] = json_item['statement_poof']
+                all_dicts[json_item['query_id']] = json_item
+                assert len(text) > 0
+            except:
+                import pdb
+                pdb.set_trace()
+    file_pattern = os.path.join(args.input_path, '[2-9]*.jsonl')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['working_file']
+                    text = get_lean(json_item['model_response']).split("#align")[0]
+                    all_dicts[json_item['query_id']]['cmd'].append('\n\n'.join([working_env, text]))
+                    assert len(text) > 0
+                except:
+                    import pdb
+                    pdb.set_trace()
+    for k, v in all_dicts.items():
+        command_list.append(v)
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

lake-manifest.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{"version": 7,
+ "packagesDir": ".lake/packages",
+ "packages":
+ [{"url": "https://github.com/leanprover/std4",
+   "type": "git",
+   "subDir": null,
+   "rev": "e5306c3b0edefe722370b7387ee9bcd4631d6c17",
+   "name": "std",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/quote4",
+   "type": "git",
+   "subDir": null,
+   "rev": "fd760831487e6835944e7eeed505522c9dd47563",
+   "name": "Qq",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "master",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/aesop",
+   "type": "git",
+   "subDir": null,
+   "rev": "8be30c25e3caa06937feeb62f7ca898370f80ee9",
+   "name": "aesop",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "master",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/ProofWidgets4",
+   "type": "git",
+   "subDir": null,
+   "rev": "fb65c476595a453a9b8ffc4a1cea2db3a89b9cd8",
+   "name": "proofwidgets",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "v0.0.30",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover/lean4-cli",
+   "type": "git",
+   "subDir": null,
+   "rev": "be8fa79a28b8b6897dce0713ef50e89c4a0f6ef5",
+   "name": "Cli",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/import-graph.git",
+   "type": "git",
+   "subDir": null,
+   "rev": "61a79185b6582573d23bf7e17f2137cd49e7e662",
+   "name": "importGraph",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "main",
+   "inherited": true,
+   "configFile": "lakefile.lean"},
+  {"url": "https://github.com/leanprover-community/mathlib4",
+   "type": "git",
+   "subDir": null,
+   "rev": "3cecb823a74ed737c6ebc115e515eba649ec7715",
+   "name": "mathlib",
+   "manifestFile": "lake-manifest.json",
+   "inputRev": "3cecb823a74ed737c6ebc115e515eba649ec7715",
+   "inherited": false,
+   "configFile": "lakefile.lean"}],
+ "name": "REPL",
+ "lakeDir": ".lake"}

lakefile.lean ADDED Viewed

	@@ -0,0 +1,17 @@

+import Lake
+open Lake DSL
+package REPL {
+  -- add package configuration options here
+}
+lean_lib REPL {
+  -- add library configuration options here
+}
+@[default_target]
+lean_exe repl where
+  root := `REPL.Main
+  supportInterpreter := true
+require mathlib from git "https://github.com/leanprover-community/mathlib4"@"3cecb823a74ed737c6ebc115e515eba649ec7715"

lean-toolchain ADDED Viewed

	@@ -0,0 +1 @@


1	+ leanprover/lean4:v4.7.0-rc2

nohup.out ADDED Viewed

	@@ -0,0 +1,4 @@


1	+
2	+
3	+ Pass rate: 20.0%
4	+

openllm_pass_rate_multi_pass.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import pdb
+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+def get_output(input_path, k):
+    try:
+        # Split the input path based on '/'
+        parts = input_path.split('/')
+        # Find the index of 'zero_shot' and get the 4th part after it
+        zero_shot_index = parts.index('zero_shot')
+        part1 = parts[zero_shot_index + 1]
+        part3 = parts[zero_shot_index + 3]
+        part4 = parts[zero_shot_index + 4]
+        result = f"openllm_result/{part3}/{part1}_pass{part4}.json"
+        print(result)
+        return result
+    except:
+        print("No match found.")
+        return None
+# List of input paths
+input_path_lists = [
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/deepseek-math-7b-base/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-instruct/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-instruct/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_7b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_7b/1/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/llemma_7b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_34b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_34b/1/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/llemma_34b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-7b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/internlm2-math-7b/1/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/internlm2-math-7b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/Mistral-7B-Instruct-v0.2/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/Mistral-7B-Instruct-v0.2/1/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/Mistral-7B-Instruct-v0.2/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-20b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/internlm2-math-20b/1/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/internlm2-math-20b/1/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-base/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-base/5/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/deepseek-math-7b-base/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/deepseek-math-7b-instruct/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/deepseek-math-7b-instruct/5/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/deepseek-math-7b-instruct/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_7b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_7b/5/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/llemma_7b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/llemma_34b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/llemma_34b/5/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/llemma_34b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-7b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/internlm2-math-7b/5/",
+    "../auto-info/generate_result/zero_shot/wild_test/generation/internlm2-math-7b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/Mistral-7B-Instruct-v0.2/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/Mistral-7B-Instruct-v0.2/5/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/Mistral-7B-Instruct-v0.2/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_basic_test/generation/internlm2-math-20b/5/",
+    # "../auto-info/generate_result/zero_shot/lean4_random_test/generation/internlm2-math-20b/5/",
+    "../auto-info/generate_result/zero_shot/wild_test/generation/internlm2-math-20b/5/",
+]
+def extract_group(input_path):
+    try:
+        # Split the input path based on '/'
+        parts = input_path.split('/')
+        # Find the index of 'zero_shot' and get the 4th part after it
+        zero_shot_index = parts.index('zero_shot')
+        k = parts[zero_shot_index + 4]
+        return k
+    except (IndexError, ValueError):
+        # Handle cases where 'zero_shot' is not found or there are not enough parts
+        return None
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    k = extract_group(input_path)
+    if "wild" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
+        print(f"wild")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 openllm_pass_rate_new_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)}  --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))
+    else:
+        print(f"lean")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 openllm_pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))

openllm_pass_rate_new_notlean_test.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+from openllm_pass_rate_new_test import get_lean
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        # result_dict = filter_json(item)
+        result_dict = item
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print('total len:', len(results))
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    import pdb
+    command_list = []
+    # json_filename = 'data/notlean_dependency.json'
+    json_filename = 'data/basic_working.json'
+    json_item = json.load(open(json_filename, encoding='utf-8'))
+    working_env = json_item['working_file']
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    json_item['cmd'] = []
+                    for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                        if "llemma" in args.input_path:
+                            output = output.split('###')[0]
+                        statement = get_lean(output.strip(), args.input_path)
+                        json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                    json_item['answer'] =  json_item['content']['answer']
+                except:
+                    import pdb
+                    pdb.set_trace()
+                command_list.append(json_item)
+    command_list = command_list
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

openllm_pass_rate_new_test.py ADDED Viewed

	@@ -0,0 +1,306 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print('total len:', len(results))
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def get_lean(text, input_path):
+    if any(x in input_path for x in ["deepseek-math-7b-instruct", "deepseek-math-7b-base", "llemma_34b", "llemma_7b"]):
+        try:
+            code_block_pattern = r"```lean4\s*\n(.*?)\n```"
+            code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+            content = "\n\n".join(code_blocks)
+            # import pdb
+            # pdb.set_trace()
+        except:
+            matches = re.findall(r'```(.*?)```', text, re.DOTALL)
+            if len(matches):
+                content = "\n\n".join(matches)
+    elif any(x in input_path for x in ["internlm2-math"]):
+        import pdb
+        try:
+            code_block_pattern = r"```\nlean\s*\n(.*?)\n```"
+            code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+            content = "\n\n".join(code_blocks)
+            # import pdb
+            # pdb.set_trace()
+        except:
+            matches = re.findall(r'```(.*?)```', text, re.DOTALL)
+            if len(matches):
+                content = "\n\n".join(matches)
+            # import pdb
+            # pdb.set_trace()
+    elif any(x in input_path for x in ["Mistral-7B-Instruct-v0.2"]):
+        try:
+            code_block_pattern = r"```lean\s*\n(.*?)\n```"
+            code_blocks = re.findall(code_block_pattern, text, re.DOTALL)
+            content = "\n\n".join(code_blocks)
+            # import pdb
+            # pdb.set_trace()
+        except:
+            matches = re.findall(r'```(.*?)```', text, re.DOTALL)
+            if len(matches):
+                content = "\n\n".join(matches)
+            # import pdb
+            # pdb.set_trace()
+    else:
+        raise NotImplementedError("not implmemented")
+    if not len(content.strip()):
+        content = "theorem h : f + g = 39 := by exact rfl"
+    return content
+def main(args):
+    command_list = []
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['content']['working_file']
+                    # pdb.set_trace()
+                    # statement = json_item['total output'][0]
+                    json_item['cmd'] = []
+                    for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                        if "llemma" in args.input_path:
+                            output = output.split('###')[0]
+                        statement = get_lean(output.strip(), args.input_path)
+                        json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                    json_item['answer'] = json_item['content']['statement_poof']
+                    # assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                # import pdb
+                # pdb.set_trace()
+                command_list.append(json_item)
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import tempfile
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item):
+        temp_dir = '/data/tmp'
+        temp_file = os.path.join(temp_dir, f"test_{item['index']}.lean")  # Ensure unique filenames
+        with open(temp_file, "w") as f:
+            f.write(item['cmd'])
+        data = '{"path": "%s", "allTactics": true}' % temp_file
+        command = f'echo \'{data}\' | lake exe repl'
+        try:
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            stderr = result.stderr.decode('utf-8')
+            if "messages" not in json.loads(stdout):
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+            else:
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'nopass'}
+        except subprocess.CalledProcessError as e:
+            return {'error': str(e), 'status': 'nopass'}
+        os.remove(temp_file)
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=32) as executor:
+        futures = [executor.submit(execute_command, {'index': i, 'cmd': cmd['cmd']}) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            if result['status'] == 'pass':
+                passed += 1
+    pass_rate = (passed / total) * 100
+    print(f"Pass rate: {pass_rate}%")
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    for i in range(4):
+        with open(f"{args.input_path}/{i}.json", 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    # json_item['content']['statement_poof']
+                    # json_item['cmd']  = '\n'.join([json_item['content']['working_file'] , json_item['total output'][0]])
+                    working_env = json_item['content']['working_file'].split('\n')
+                    for loc in range(len(working_env) - 3, 0, -1):
+                        if not len(working_env[loc].strip()):
+                            break
+                    working_env = '\n'.join(working_env[:loc] + ['\n'])
+                    # statement = json_item['content']['statement_poof'].split('\n')
+                    statement = json_item['total output'][0].split('\n')
+                    for loc in range(len(statement)):
+                        if not len(statement[loc].strip()):
+                            break
+                    statement  =  '\n'.join(statement[:loc] + ['\n'])
+                    json_item['cmd']  = '\n'.join([working_env,  statement])
+                    assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                command_list.append(json_item)
+    command_list = command_list
+    results = []
+    passed = 0
+    total = 0
+    single(command_list[:1])
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_atp_pass.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import pdb
+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string, k):
+    pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part2 = match.group(3) + f"pass{k}.jsonl"
+        result = "/".join([part1, part2])
+        print(result)
+    else:
+        print("No match found.")
+        assert True
+    return result
+# List of input paths
+input_path_lists = [
+    # "../auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/10/",
+    # "../auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all/2/10/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all/2/20/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/5/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/5/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # Add more input paths as needed
+    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
+    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
+    "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
+]
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    k = 5
+    if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
+        print(f"wild")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)}  --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))
+    else:
+        print(f"lean")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))

pass_rate_atp_test.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            # start_line = me['pos']['line'] - 1
+                            # current_column =me['pos']['column'] -1
+                            # for line_n in range(start_line - 1, 0 , -1):
+                            #     line_len = len(cmd.split('\n')[line_n])
+                            #     current_column  += line_len + 1
+                            #     if not line_len:
+                            #         break
+                            current_column = -1
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    file_pattern = os.path.join(args.input_path, '[0-1]*.json')
+    # head_list = ["import MiniF2F.Minif2fImport", "import MiniF2F.Valid", "import MiniF2F.Test"]
+    head_list = ''
+    with open("MiniF2F/Minif2fImport.lean", 'r', encoding='utf8') as rf:
+        for line in rf.readlines():
+            head_list += line
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['content']['header']
+                    # pdb.set_trace()
+                    # statement = json_item['total output'][0]
+                    json_item['cmd'] = []
+                    for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                        proof = output.split("#align")[0]
+                        atp = json_item['content']['formal_statement'].split(":=")[0] + ":=" + proof
+                        json_item['cmd'].append('\n\n'.join( [head_list, working_env,  atp]))
+                        # print(json_item['cmd'][0])
+                        # import pdb
+                        # pdb.set_trace()
+                    # json_item['answer'] = json_item['content']['statement_poof']
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                # import pdb
+                # pdb.set_trace()
+                command_list.append(json_item)
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='/opt/tiger/CLIP/theorem_proving/generate_result/zero_shot/minif2f_test/generation/lean5_random_15k_all_mathrft/2/5/')
+    arg_parser.add_argument('--input_path', type=str, default='/opt/tiger/CLIP/theorem_proving/generate_result/zero_shot/minif2f_test/generation/lean5_random_15k_all_mathrft/2/5/')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=5)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_found_item.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import tempfile
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item):
+        temp_dir = '/data/tmp'
+        temp_file = os.path.join(temp_dir, f"test_{item['index']}.lean")  # Ensure unique filenames
+        with open(temp_file, "w") as f:
+            f.write(item['cmd'])
+        data = '{"path": "%s", "allTactics": true}' % temp_file
+        command = f'echo \'{data}\' | lake exe repl'
+        try:
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            stderr = result.stderr.decode('utf-8')
+            if "messages" not in json.loads(stdout):
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+            else:
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'nopass'}
+        except subprocess.CalledProcessError as e:
+            return {'error': str(e), 'status': 'nopass'}
+        os.remove(temp_file)
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=32) as executor:
+        futures = [executor.submit(execute_command, {'index': i, 'cmd': cmd['cmd']}) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            if result['status'] == 'pass':
+                passed += 1
+    pass_rate = (passed / total) * 100
+    print(f"Pass rate: {pass_rate}%")
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main():
+    input_file= f"/data/haiming/multilevel_isabelle-main/lean4/repl/self_autoformalization/data/mma_filepath/all_basic.jsonl"
+    command_list = json.load(open(input_file, 'r', encoding = 'utf-8'))
+    new_list = []
+    for json_item in command_list:
+        try:
+            working_env = json_item['working_file']
+            statement =  json_item['statement_poof']
+            json_item['cmd']  = '\n'.join([working_env,  statement])
+            assert len(statement) > 0
+            if len(working_env) < 10000:
+                new_list.append(json_item)
+        except:
+            import pdb
+            pdb.set_trace()
+    output_file = "/data/haiming/multilevel_isabelle-main/data/lean4_basic/1k_test.jsonl"
+    with open(output_file, 'w', encoding='utf-8') as file:
+        json.dump(new_list, file, indent=4, ensure_ascii=False)
+    # multi(new_list)
+if __name__ == '__main__':
+    main()

pass_rate_multi.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import pdb
+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string):
+    pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part2 = match.group(3) + ".jsonl"
+        result = "/".join([part1, part2])
+        print(result)
+    else:
+        print("No match found.")
+        assert True
+    return result
+# List of input paths
+input_path_lists = [
+    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_5k/2/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/",
+    # Add more input paths as needed
+]
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+    command = f"python3 pass_rate_new.py --input_path {input_path} --output_path {get_output(input_path)}"
+    subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+    print("\n\n",file=open(output_file, "a"))

pass_rate_multi_notlean.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output_lean.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "/opt/tiger/mariana/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_5k/2/1/",
+    "/opt/tiger/mariana/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all/2/1/",
+    "/opt/tiger/mariana/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_5k/2/1/",
+    "/opt/tiger/mariana/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/math_train/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string):
+    pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part2 = match.group(3) + ".jsonl"
+        result = "/".join([part1, part2])
+        print(result)
+    else:
+        print("No match found.")
+        assert True
+    return result
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+    command = f"python3 pass_rate_notlean.py --input_path {input_path} --output_path {get_output(input_path)}"
+    subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+    print("\n\n",file=open(output_file, "a"))

pass_rate_multi_notlean_pass.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/mistral-qa-gsm8kmath-autoform-forml4-rft-math/1/10",
+    "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/mistral-qa-gsm8kmath-autoform-forml4-rft-math/2/10",
+    "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/mistral-qa-gsm8kmath-autoform-forml4-rft-math/3/10",
+    # "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/math_train/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string, k):
+    pattern = r"zero_shot/(\w+)/(.+?)/(\w+)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part_model = match.group(2)
+        part2 = match.group(3)
+        part3 = match.group(4) +  f"pass{k}.jsonl"
+        result = "/".join([part1, part_model, part2, part3])
+        print(result)
+    else:
+        print("No match found.")
+        assert True
+    return result
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    k = 10
+    print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+    command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path, k)}  --k {k}"
+    subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+    print("\n\n",file=open(output_file, "a"))

pass_rate_multi_pass.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import pdb
+import subprocess
+import re
+# Output file
+output_file = "pass_rate_output.txt"
+# Clearing the output file before appending new content
+with open(output_file, "w") as file:
+    file.write("")
+# List of input paths
+input_path_lists = [
+    "test/zero_shot/wild_test/generation/lean4_random_15k_all/2/1/",
+]
+def get_output(input_string, k):
+    pattern = r"zero_shot/(\w+)/(.+?)/(\w+)"
+    match = re.search(pattern, input_string)
+    if match:
+        part1 = match.group(1)
+        part2 = match.group(3) + f"pass{k}.jsonl"
+        result = "/".join([part1, part2])
+        print(result)
+    else:
+        print("No match found.")
+        assert True
+    return result
+# List of input paths
+input_path_lists = [
+    # "../auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all/2/10/",
+    # "../auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all/2/10/",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1",
+    # "../auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_verifier_rft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all/2/20/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/5/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/5/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_5k/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/2/1/",
+    # "/opt/tiger/mariana/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all/2/1/",
+    # "test/zero_shot/lean4_random_test/generation/lean4_random_15k_all/3/1/",
+    # "test/zero_shot/lean4_basic_test/generation/lean4_random_15k_all/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/1/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/3/1/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/gsm8k_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/math_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # "/opt/tiger/auto-info/generate_result/zero_shot/lean4_15k_train/generation/lean4_random_15k_all_mathrft/2/10/",
+    # Add more input paths as needed
+    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_basic_test/generation/lean4_random_15k_all_mathrft/2/5/",
+    "/opt/tiger/auto-info/generate_result/zero_shot/lean4_random_test/generation/lean4_random_15k_all_mathrft/2/5/",
+    "/opt/tiger/auto-info/generate_result/zero_shot/wild_test/generation/lean4_random_15k_all_mathrft/2/5/",
+]
+# Iterate through the input paths and run the command
+for input_path in input_path_lists:
+    k = 5
+    if "wild_test" in input_path or "gsm8k_train" in input_path or "math_train" in input_path:
+        print(f"wild")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 pass_rate_notlean_test.py --input_path {input_path} --output_path {get_output(input_path,k)}  --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))
+    else:
+        print(f"lean")
+        print(f"Running for input path: {input_path}", file=open(output_file, "a"))
+        command = f"python3 pass_rate_new_test.py --input_path {input_path} --output_path {get_output(input_path, k)} --k {k}"
+        subprocess.run(command, shell=True, stdout=open(output_file, "a"), stderr=subprocess.STDOUT)
+        print("\n\n",file=open(output_file, "a"))

pass_rate_new.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item):
+        temp_dir = '/opt/jianqiao'
+        temp_file = os.path.join(temp_dir, f"test_{item['index']}.lean")  # Ensure unique filenames
+        with open(temp_file, "w") as f:
+            f.write(item['cmd'])
+        data = '{"path": "%s", "allTactics": true}' % temp_file
+        command = f'echo \'{data}\' | lake exe repl'
+        try:
+            result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            stderr = result.stderr.decode('utf-8')
+            if "messages" not in json.loads(stdout) and not len(stderr):
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'pass' , 'statement':item['statement'], 'content': item['content']}
+            else:
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'statement':item['statement'] , 'content': item['content']}
+        except subprocess.TimeoutExpired as e:
+            return {'error': str(e), 'status': 'nopass_limit', 'statement':item['statement'], 'content': item['content']}
+        except subprocess.CalledProcessError as e:
+            return {'error': str(e), 'status': 'nopass_error', 'statement':item['statement'], 'content': item['content']}
+        os.remove(temp_file)
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=32) as executor:
+        futures = [executor.submit(execute_command, {'index': i, 'cmd': cmd['cmd'], 'statement':cmd['statement'], 'content':cmd['content']}) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            if result['status'] == 'pass':
+                passed += 1
+    pass_rate = (passed / total) * 100
+    print(f"total test: {total}")
+    print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['content']['working_file']
+                    # pdb.set_trace()
+                    # statement = json_item['total output'][0]
+                    statement = json_item['total output'][0].split("#align")[0]
+                    json_item['statement'] = statement
+                    json_item['cmd']  = '\n\n'.join([working_env,  statement])
+                    assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                command_list.append(json_item)
+    command_list = command_list
+    results = []
+    passed = 0
+    total = 0
+    multi(command_list, args.output_path)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_new_test.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        import pdb
+                        pdb.set_trace()
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=1) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    file_pattern = os.path.join(args.input_path, '[0-1]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['content']['working_file']
+                    # pdb.set_trace()
+                    # statement = json_item['total output'][0]
+                    json_item['cmd'] = []
+                    for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                        statement = output.split("#align")[0]
+                        json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                    json_item['answer'] = json_item['content']['statement_poof']
+                    assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                # import pdb
+                # pdb.set_trace()
+                command_list.append(json_item)
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_new_test_allcontent.py ADDED Viewed

	@@ -0,0 +1,255 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import glob
+import tempfile
+import random
+import random;random.seed(42)
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, args):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results', 'cmd']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    working_env = json_item['content']['working_file']
+                    # pdb.set_trace()
+                    # statement = json_item['total output'][0]
+                    json_item['cmd'] = []
+                    for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                        statement = output.split("#align")[0]
+                        json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                    json_item['answer'] = json_item['content']['statement_poof']
+                    assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                # import pdb
+                # pdb.set_trace()
+                command_list.append(json_item)
+    multi(random.sample(command_list,  1000), args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_notlean.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import tempfile
+import glob
+import pdb
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, output_path):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item):
+        temp_dir = '/opt/jianqiao'
+        temp_file = os.path.join(temp_dir, f"test_{item['index']}.lean")  # Ensure unique filenames
+        with open(temp_file, "w") as f:
+            f.write(item['cmd'])
+        data = '{"path": "%s", "allTactics": true}' % temp_file
+        command = f'echo \'{data}\' | lake exe repl'
+        try:
+            result = subprocess.run(command, shell=True, check=True,timeout=600,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            stderr = result.stderr.decode('utf-8')
+            if "messages" not in json.loads(stdout) and not len(stderr):
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'pass' , 'statement':item['statement'], 'content': item['content']}
+            else:
+                return {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'statement':item['statement'] , 'content': item['content']}
+        except subprocess.TimeoutExpired as e:
+            return {'error': str(e), 'status': 'nopass_limit', 'statement':item['statement'], 'content': item['content']}
+        except subprocess.CalledProcessError as e:
+            return {'error': str(e), 'status': 'nopass_error', 'statement':item['statement'], 'content': item['content']}
+        os.remove(temp_file)
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=32) as executor:
+        futures = [executor.submit(execute_command, {'index': i, 'cmd': cmd['cmd'], 'statement':cmd['statement'], 'content':cmd['content']}) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            if result['status'] == 'pass':
+                passed += 1
+    pass_rate = (passed / total) * 100
+    print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    import pdb
+    command_list = []
+    # json_filename = 'data/notlean_dependency.json'
+    json_filename = 'data/basic_working.json'
+    json_item = json.load(open(json_filename, encoding='utf-8'))
+    working_env = json_item['working_file']
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                try:
+                    json_item = json.loads(line)
+                    statement = json_item['total output'][0].split("#align")[0]
+                    json_item['statement']  = statement
+                    json_item['cmd']  = '\n\n'.join([working_env,  statement])
+                    assert len(statement) > 0
+                    # json_item['cmd']  = '\n'.join([working_env, json_item['total output'][0]])
+                except:
+                    import pdb
+                    pdb.set_trace()
+                command_list.append(json_item)
+    command_list = command_list
+    results = []
+    passed = 0
+    total = 0
+    multi( command_list, args.output_path)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)

pass_rate_notlean_test.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import os
+import subprocess
+from argparse import ArgumentParser
+import json
+from concurrent.futures import ThreadPoolExecutor
+from tqdm import tqdm
+import tempfile
+import glob
+import pdb
+def wrapped_function(item):
+    results = []
+    passed = 0
+    total = 0
+    temp_dir = tempfile.gettempdir()
+    temp_file = os.path.join(temp_dir, f"test.lean")
+    with open(temp_file, "w") as f:
+        f.write(item['cmd'])
+    # Rest of the function code...
+    # Process the item using the temporary file
+    # ...
+    # Clean up the temporary file
+    data = '{"path": "%s", "allTactics": true}' %(temp_file)
+    command = 'echo \'%s\' | lake exe repl' % data
+    try:
+        result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        stdout = result.stdout.decode('utf-8')
+        stderr = result.stderr.decode('utf-8')
+        #         stdout = result.stdout.decode('utf-8')
+        json_stdout = json.loads(stdout)
+        if "messages" not in json_stdout.keys():
+            passed += 1
+        # results.append({'item': item['content'], 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+        results.append({ 'stdout': stdout, 'stderr': stderr, 'status': 'pass'})
+    except subprocess.CalledProcessError as e:
+        # results.append({'item': item['content'], 'error': str(e), 'status': 'nopass'})
+        results.append({ 'error': str(e), 'status': 'nopass'})
+    total += 1
+    pass_rate = passed / (passed + total) * 100
+    return {'results': results, 'pass_rate': pass_rate}
+# Set the directory where your .lean files are located
+# Get a list of all .lean files in the directory
+# lean_files = [f for f in os.listdir(directory) if f.endswith(".lean")]
+# lean_files = ["test/file.lean"]
+def single(command_list, output_path):
+    results = []
+    passed = 0
+    total = 0
+    for item in tqdm(command_list):
+        with open("test/test.lean", "w", encoding = 'utf-8') as f:
+            f.write(item['cmd'])
+        data = '{"path": "test/test.lean", "allTactics": true}'
+        # data = '{"cmd": "%s", "allTactics": true}' % item['cmd']
+        command = 'echo \'%s\' | lake exe repl' % data
+        try:
+            # process = subprocess.Popen(['lake', 'exe', 'repl'], stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+            #                            stderr=subprocess.PIPE)
+            # stdout, stderr = process.communicate(input=data.encode(encoding='utf-8'))
+            # stdout = stdout.decode('utf-8')
+            result = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            stdout = result.stdout.decode('utf-8')
+            json_stdout = json.loads(stdout)
+            if "messages" not in json_stdout.keys():
+                passed += 1
+            stderr = result.stderr.decode('utf-8')
+            results.append({
+                # 'item': item['content'],
+                'stdout': stdout,
+                'stderr': stderr,
+                'status': 'pass'
+            })
+        except subprocess.CalledProcessError as e:
+            results.append({
+                # 'item': item['content'],
+                'error': str(e),
+                'status': 'nopass'
+            })
+        total += 1
+    # Calculate pass rate
+    pass_rate = passed / total * 100
+    print(pass_rate)
+    # Save results to a JSON file
+    with open('results.json', 'w') as f:
+        json.dump({'results': results, 'pass_rate': pass_rate}, f, indent=2, ensure_ascii=False)
+def multi(command_list, output_path, k ):
+    results = []
+    passed = 0
+    total = 0
+    def execute_command(item, index):
+        temp_dir = '/opt/jianqiao'
+        def filter_json(json_data):
+            filtered_data = {}
+            for key in json_data.keys():
+                if key in ['question', 'answer', 'total output', 'results']:
+                    filtered_data[key] = json_data[key]
+            return filtered_data
+        result_dict = filter_json(item)
+        result_dict['results'] = []
+        for i, cmd in enumerate(item['cmd']):
+            temp_file = os.path.join(temp_dir,f"{index}_test_{i}.lean")  # Ensure unique filenames
+            with open(temp_file, "w") as f:
+                f.write(cmd)
+            data = '{"path": "%s", "allTactics": true}' % temp_file
+            command = f'echo \'{data}\' | lake exe repl'
+            try:
+                result = subprocess.run(command, shell=True, check=True,timeout=480,  stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                stdout = json.loads(result.stdout.decode('utf-8'))
+                stderr = result.stderr.decode('utf-8')
+            except subprocess.TimeoutExpired as e:
+                result_item = {'error': str(e), 'status': 'nopass_limit'}
+            except subprocess.CalledProcessError as e:
+                result_item = {'error': str(e), 'status': 'nopass_error'}
+            else:
+                if "messages" not in stdout and not len(stderr):
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass' }
+                elif not len(stderr) and "messages" in stdout:
+                    flag = 0
+                    for me in stdout['messages']:
+                        if me['severity'] == 'error':
+                            flag = 1
+                            start_line = me['pos']['line'] - 1
+                            current_column =me['pos']['column'] -1
+                            for line_n in range(start_line - 1, 0 , -1):
+                                line_len = len(cmd.split('\n')[line_n])
+                                current_column  += line_len + 1
+                                if not line_len:
+                                    break
+                            result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos':current_column}
+                            break
+                    if not flag :
+                        result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'pass'}
+                else:
+                    assert len(stderr)
+                    result_item = {'stdout': stdout, 'stderr': stderr, 'status': 'nopass', 'string_pos': 0 }
+            result_dict['results'].append(result_item)
+        return result_dict
+    total = len(command_list)
+    with ThreadPoolExecutor(max_workers=128) as executor:
+        futures = [executor.submit(execute_command, cmd, i) for i, cmd in enumerate(command_list)]
+        for future in tqdm(futures, total=total, desc="Processing Commands"):
+            result = future.result()
+            results.append(result)
+            # if result['status'] == 'pass':
+            #     passed += 1
+    def calculate_pass(result_list, k):
+        pass_1_count = 0
+        pass_k_count = 0
+        for result in result_list:
+            results = result.get('results', [])
+            if results:
+                for j in range(min(1, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_1_count += 1
+                        break
+                for j in range(min(k, len(results))):
+                    if results[j].get('status') == 'pass':
+                        pass_k_count += 1
+                        break
+        pass_1 = pass_1_count / len(result_list) if result_list else 0
+        pass_k = pass_k_count / len(result_list) if result_list else 0
+        return pass_1, pass_k
+    pass_1, pass_k = calculate_pass(results, k)
+    print("Pass@1:", pass_1)
+    print(f"Pass@{k}:", pass_k)
+    # pass_rate = (passed / total) * 100
+    # print(f"total test: {total}")
+    # print(f"Pass rate: {pass_rate}%")
+    output_file = f"pass_rate_results/{output_path}"
+    # Create the directory if it doesn't exist
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    with open(f"{output_file}", 'w') as f:
+        json.dump({'results': results, 'pass_1': pass_1, f"pass_{k}":pass_k}, f, indent=2, ensure_ascii=False)
+import re
+def remove_simp_pattern_from_end(s):
+    pattern = r'@\[simp\s*.*?\]$'
+    return re.sub(pattern, '', s)
+def main(args):
+    command_list = []
+    # json_filename = 'data/notlean_dependency.json'
+    # json_filename = 'data/basic_working.json'
+    # json_item = json.load(open(json_filename, encoding='utf-8'))
+    # working_env = json_item['working_file']
+    working_env = ''
+    json_filename = 'data/leandojo.txt'
+    with open(json_filename, 'r') as rf:
+        for line in rf.readlines():
+            working_env += line
+    file_pattern = os.path.join(args.input_path, '[0-9]*.json')
+    for file_path in glob.glob(file_pattern):
+        with open(file_path, 'r', encoding='utf-8') as rf:
+            for line in rf.readlines():
+                json_item = json.loads(line)
+                json_item['cmd'] = []
+                for output in json_item['total output'][:min(args.k, len(json_item['total output']))]:
+                    statement = output.split("#align")[0]
+                    json_item['cmd'].append('\n\n'.join([working_env,  statement]))
+                # json_item['answer'] =  json_item['content']['answer']
+                command_list.append(json_item)
+    command_list = command_list
+    multi(command_list, args.output_path, args.k)
+if __name__ == '__main__':
+    arg_parser = ArgumentParser()
+    arg_parser.add_argument('--data_path', type=str,
+                            default='data/grade-school-math-master/grade_school_math/data/test.jsonl')
+    arg_parser.add_argument('--input_path', type=str, default='')
+    arg_parser.add_argument('--cuda_num', type=int, default=8)
+    arg_parser.add_argument('--output_path', type=str, default='total.json')
+    arg_parser.add_argument('--k', type=int, default=1)
+    arg_parser.add_argument('--generate_method', type=str,
+                            choices=['single', 'sft', 'comp', 'self_consistency', 'single_consistency'])
+    arg_parser.add_argument('--method', type=str, choices=['main', 'test', 'get_data'])
+    args = arg_parser.parse_args()
+    main(args)