|
import heapq |
|
import math |
|
import random |
|
import re |
|
import json |
|
from typing import List, Tuple, Dict, Any, Optional |
|
import itertools |
|
from transformers import AutoTokenizer |
|
import asyncio |
|
from openai import AsyncOpenAI |
|
import numpy as np |
|
from openai import OpenAI |
|
import openai |
|
import json |
|
|
|
import re |
|
def read_jsonl(file_path): |
|
data = [] |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
for line in file: |
|
data.append(json.loads(line.strip())) |
|
return data |
|
|
|
def extract_answer_judge(solution_text: str): |
|
boxed_pattern = r'\\boxed\{([^}]*)\}' |
|
matches = re.findall(boxed_pattern, solution_text) |
|
if matches: |
|
return matches[-1].strip() |
|
return None |
|
|
|
def separate_steps(steps: List[str], mode: str = 'join') -> Any: |
|
delimiter = "\n\n" |
|
if mode == 'join': |
|
if not isinstance(steps, list): |
|
raise TypeError("For 'join' mode, 'steps' must be a list of strings.") |
|
return delimiter.join(steps) |
|
elif mode == 'split': |
|
if not isinstance(steps, str): |
|
raise TypeError("For 'split' mode, 'steps' must be a string.") |
|
return steps.split(delimiter) |
|
else: |
|
raise ValueError("Mode should be either 'join' or 'split'.") |
|
|
|
|
|
|
|
def evaluate_llm_as_judge(problem: str, steps: list, final_answer, output_type: str = 'bool') -> bool: |
|
global client |
|
|
|
client = OpenAI( |
|
base_url="http://localhost:8014/v1", |
|
api_key="token-abc123" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_name = "DeepSeek-R1-Distill-Qwen-14B" |
|
|
|
|
|
messages = [] |
|
feedback = None |
|
|
|
judge_prompt = f""" |
|
I will show you a [Math Problem], the [Answer], and an [AI's Solution] generated by an AI assistant. Your task is to determine if the **final answer** in the [AI's Solution] matches the answer in the [Reference Solution]. |
|
|
|
-------------------------------------------------- |
|
|
|
[Math Problem] |
|
|
|
{problem} |
|
|
|
[Answer] |
|
|
|
{final_answer} |
|
|
|
[AI's Solution] |
|
|
|
{steps} |
|
|
|
-------------------------------------------------- |
|
|
|
Please evaluate whether the **Answer:** in the [AI's Solution] is correct, based solely on whether it matches the **final answer** in the [Answer]. |
|
|
|
Note that the [AI's Solution] does not need to replicate same reasoning steps of the [Answer]; it only needs to reach the same **final answer** to be considered correct. |
|
|
|
Reply with only "Yes" or "No" in the end of your response. |
|
|
|
""" |
|
messages.append({ |
|
'role': 'user', |
|
'content': judge_prompt |
|
}) |
|
completion = client.chat.completions.create( |
|
model=model_name, |
|
messages=messages, |
|
n=1, |
|
temperature=0.6, |
|
max_tokens=8192, |
|
) |
|
response = completion.choices[0].message.content |
|
|
|
print("*****Verification*****:", response) |
|
|
|
|
|
content = response.strip() |
|
last_words = ' '.join(content.split()[-3:]) |
|
|
|
|
|
if "Yes" in last_words: |
|
yes_or_no = "Yes" |
|
|
|
elif "No" in last_words: |
|
yes_or_no = "No" |
|
else: |
|
yes_or_no = None |
|
|
|
print('yes_or_no',yes_or_no) |
|
|
|
|
|
merged_data = { |
|
'question': problem, |
|
'final_answer': final_answer, |
|
'reasining_steps': steps, |
|
'yes_or_no': yes_or_no, |
|
'response': response |
|
|
|
} |
|
|
|
|
|
return merged_data |
|
|
|
|
|
|
|
new_file_path = '/data/zeju/O1_data/0311_test_training_new_processed.jsonl' |
|
data_all = read_jsonl(new_file_path) |
|
print(len(data_all)) |
|
output = [] |
|
zero = 0 |
|
for data in data_all: |
|
print(data_all.index(data)) |
|
problem = data['question'] |
|
steps_ori = data['process'] |
|
labels = data['label'] |
|
final_answer = data['answer'] |
|
steps = steps_ori.split('\n\n') |
|
steps[0] = problem + ' ' + steps[0] |
|
|
|
steps_updated = steps[0:len(steps)-1] |
|
|
|
|
|
if zero in data['label']: |
|
|
|
merged_data = evaluate_llm_as_judge(problem=problem, steps=steps_updated, final_answer= final_answer, output_type='bool') |
|
if merged_data != None: |
|
output.append(merged_data) |
|
|
|
else: |
|
merged_data = { |
|
'question': problem, |
|
'final_answer': final_answer, |
|
'reasining_steps': steps_updated, |
|
'yes_or_no': "No", |
|
'response': '<think>\n\n</think>-1' |
|
} |
|
output.append(merged_data) |
|
|
|
|
|
|
|
output_file = '/data/zeju/O1_data/0312_test_80_washdata.jsonl' |
|
with open(output_file, 'w', encoding='utf-8') as output_file: |
|
for entry in output: |
|
output_file.write(json.dumps(entry, ensure_ascii=False) + '\n') |
|
|
|
print(f"数据已成功写入 {output_file}") |
|
|
|
|
|
|
|
|
|
|