|
import json |
|
import os |
|
import tqdm |
|
|
|
def get_statement_proof(text): |
|
import re |
|
|
|
|
|
|
|
statement_pattern = r"statement:\n(.*?)(?=\n\nproof:)" |
|
proof_pattern = r"proof:\n(.*)" |
|
|
|
statement_match = re.search(statement_pattern, text, re.DOTALL) |
|
proof_match = re.search(proof_pattern, text, re.DOTALL) |
|
|
|
statement_content = statement_match.group(1).strip() |
|
proof_content = proof_match.group(1).strip() |
|
|
|
return statement_content, proof_content |
|
|
|
|
|
def save_passed_results(input_list): |
|
for input_file in input_list: |
|
save_dir = os.path.dirname(input_file) |
|
if "rft" not in input_file: |
|
save_file = os.path.join(save_dir, 'pda_annotation.jsonl') |
|
else: |
|
save_file = os.path.join(save_dir, 'rft_pda_annotation.jsonl') |
|
|
|
with open(input_file, 'r') as file: |
|
data = json.load(file) |
|
|
|
pda_valid_true_count = 0 |
|
pda_valid_false_count = 0 |
|
|
|
with open(save_file, 'w') as output_file: |
|
for item in tqdm.tqdm(data['results']): |
|
statement, proof = get_statement_proof(item['question']) |
|
|
|
|
|
output = item['total output'][0] |
|
result_statementproof = item['results'][0] |
|
result_statement = item['results'][1] |
|
|
|
|
|
|
|
|
|
if result_statement['status'] == 'pass' and result_statementproof['status'] == 'nopass': |
|
pda_valid = True |
|
pda_valid_true_count += 1 |
|
elif result_statement['status'] == 'pass' and result_statementproof['status'] == 'pass': |
|
pda_valid = False |
|
pda_valid_false_count += 1 |
|
else: |
|
continue |
|
|
|
result_dict = { |
|
'nl_statement': statement, |
|
'nl_proof': proof, |
|
'fl_statementproof': output, |
|
'pda_valid': pda_valid |
|
} |
|
output_file.write(json.dumps(result_dict) + '\n') |
|
|
|
print(f"Number of items with pda_valid = True: {pda_valid_true_count}") |
|
print(f"Number of items with pda_valid = False: {pda_valid_false_count}") |
|
|
|
|
|
input_list = [ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"compile_result/lean4_random_test/lean4_random_15k_rftpass1_woproof.jsonl", |
|
"compile_result/lean4_random_test/lean4_random_15kpass1_woproof.jsonl", |
|
] |
|
|
|
save_passed_results(input_list) |
|
|