Spaces:
Running
on
Zero
Running
on
Zero
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
import re | |
import gradio as gr | |
import time | |
import spaces | |
import copy | |
# モデルとトークナイザーをモジュールレベルで読み込む | |
model_name = "Qwen/Qwen2.5-7B-Instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto") | |
class TicTacToeBoard: | |
def __init__(self): | |
# 0: 空, 1: X (先手), 2: O (後手) | |
self.board = [0] * 9 # 3x3ボード(インデックスで0-8) | |
self.turn = 1 # 1=Xプレイヤー, 2=Oプレイヤー | |
self.moves = [] # 指し手の履歴 | |
self.last_move = None # 最後に指された手 | |
def is_game_over(self): | |
"""ゲームが終了しているかをチェック""" | |
# 勝者がいる場合 | |
if self.get_winner() != 0: | |
return True | |
# 引き分け(すべてのマスが埋まっている場合) | |
if 0 not in self.board: | |
return True | |
return False | |
def get_winner(self): | |
"""勝者を返す(0=なし/引分、1=X勝ち、2=O勝ち)""" | |
# 勝ちパターン(横・縦・斜め) | |
win_patterns = [ | |
[0, 1, 2], [3, 4, 5], [6, 7, 8], # 横 | |
[0, 3, 6], [1, 4, 7], [2, 5, 8], # 縦 | |
[0, 4, 8], [2, 4, 6] # 斜め | |
] | |
for pattern in win_patterns: | |
if self.board[pattern[0]] != 0 and self.board[pattern[0]] == self.board[pattern[1]] == self.board[pattern[2]]: | |
return self.board[pattern[0]] # 勝者を返す | |
return 0 # 勝者なし | |
def get_legal_moves(self): | |
"""合法手のリスト(0-8のインデックス)を返す""" | |
if self.is_game_over(): | |
return [] | |
return [i for i in range(9) if self.board[i] == 0] | |
def make_move(self, position): | |
"""手を指す(positionは0-8のインデックス)""" | |
if position not in self.get_legal_moves(): | |
raise ValueError(f"無効な手: {position}") | |
self.board[position] = self.turn | |
self.moves.append(position) | |
self.last_move = position # 最後の手を記録 | |
self.turn = 3 - self.turn # 手番を交代(1→2, 2→1) | |
def to_string(self): | |
"""人間が読める形式で盤面を文字列化(マークダウン表形式)""" | |
symbols = {0: "・", 1: "X", 2: "O"} | |
result = [] | |
# マークダウン表のヘッダー | |
result.append("| | A | B | C |") | |
result.append("|---|---|---|---|") | |
# マークダウン表の本体 | |
for i in range(3): | |
row = [f"| {i+1} "] | |
for j in range(3): | |
index = i * 3 + j | |
row.append(f"| {symbols[self.board[index]]} ") | |
row.append("|") | |
result.append("".join(row)) | |
return "\n".join(result) | |
def index_to_coord(self, index): | |
"""インデックス(0-8)を座標(A1, B2など)に変換""" | |
if index is None: | |
return None | |
row = index // 3 + 1 | |
col = chr(ord('A') + (index % 3)) | |
return f"{col}{row}" | |
def create_user_prompt(board, player_sym): | |
"""ユーザープロンプトを生成""" | |
# 有効な手の一覧を生成 | |
legal_moves = board.get_legal_moves() | |
valid_moves = [] | |
for move in legal_moves: | |
row = move // 3 + 1 | |
col = move % 3 | |
coord = f"{chr(ord('A') + col)}{row}" | |
valid_moves.append(coord) | |
# 盤面の自然言語的な記述を生成 | |
x_positions = [] | |
o_positions = [] | |
for i, piece in enumerate(board.board): | |
if piece != 0: | |
row = i // 3 + 1 | |
col = chr(ord('A') + (i % 3)) | |
pos = f"{col}{row}" | |
if piece == 1: | |
x_positions.append(pos) | |
else: | |
o_positions.append(pos) | |
# 盤面の記述を構築 | |
board_description = "盤面の説明:" | |
if not (x_positions or o_positions): | |
board_description += "\n空の盤面" | |
else: | |
if x_positions: | |
board_description += f"\nX: {', '.join(x_positions)}" | |
if o_positions: | |
board_description += f"\nO: {', '.join(o_positions)}" | |
# ユーザーの最後の手の情報を追加 | |
user_move_info = "" | |
if board.last_move is not None: | |
last_move_piece = board.board[board.last_move] | |
# プレイヤーが最後に指した手かどうかを確認 | |
if last_move_piece == player_sym: | |
last_move_coord = board.index_to_coord(board.last_move) | |
user_move_info = f"ユーザーが{last_move_coord}を選びました。\n" | |
user_prompt = ( | |
f"{user_move_info}" | |
f"### 現在の盤面\n{board.to_string()}\n" | |
f"{board_description}\n\n" | |
f"### 有効な手\n{', '.join(valid_moves)}\n\n" | |
) | |
return user_prompt | |
def extract_move(response): | |
"""応答から手を抽出""" | |
matches = re.findall(r"<move>(.*?)</move>", response, re.DOTALL) | |
return matches[-1].strip() if matches else None | |
def extract_thinking(response): | |
"""応答から思考過程を抽出""" | |
think_match = re.search(r"<think>(.*?)</think>", response, re.DOTALL) | |
return think_match.group(1).strip() if think_match else "" | |
def coord_to_index(coord): | |
"""座標(A1, B2など)をインデックス(0-8)に変換""" | |
if not coord or len(coord) != 2: | |
return None | |
try: | |
col = ord(coord[0].upper()) - ord('A') | |
row = int(coord[1]) - 1 | |
if col < 0 or col > 2 or row < 0 or row > 2: | |
return None | |
return row * 3 + col | |
except: | |
return None | |
def get_ai_move(board, player, conversation_history, player_sym): | |
"""AIの手を取得""" | |
# 新しいユーザープロンプトを作成 | |
user_prompt = create_user_prompt(board, player_sym) | |
# 会話履歴のディープコピーを作成して変更 | |
messages = copy.deepcopy(conversation_history) | |
# ユーザープロンプトを会話履歴に追加 | |
messages.append({"role": "user", "content": user_prompt}) | |
# モデルで推論を実行 | |
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
generated_ids = model.generate(**model_inputs, max_new_tokens=512) | |
generated_ids = [ | |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
] | |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# AIの応答を会話履歴に追加 | |
messages.append({"role": "assistant", "content": response}) | |
# 応答から手と思考過程を抽出 | |
thinking = extract_thinking(response) | |
move_coord = extract_move(response) | |
move_index = coord_to_index(move_coord) if move_coord else None | |
# デバッグ情報(会話履歴の長さを確認) | |
print(f"会話履歴の現在の長さ: {len(messages)}") | |
return move_index, thinking, messages | |
def board_to_display(board, game_state="", ai_thinking=""): | |
"""ボードの状態を表示用に変換""" | |
symbols = {0: " ", 1: "X", 2: "O"} | |
board_display = [] | |
for i in range(3): | |
row = [] | |
for j in range(3): | |
index = i * 3 + j | |
row.append(symbols[board.board[index]]) | |
board_display.append(row) | |
return board_display, game_state, ai_thinking | |
def format_thinking_multi_turn(move_history): | |
"""思考プロセスをマルチターン形式でフォーマット""" | |
if not move_history: | |
return "### AIの思考プロセス\nまだ思考プロセスがありません。" | |
output = [] | |
for entry in move_history: | |
if entry['type'] == 'ai': | |
output.append(f"### LLMのターン") | |
output.append(f"{entry['thinking']}") | |
output.append(f"~~~\nLLMは {entry['move']}を えらんだ!\n~~~") | |
elif entry['type'] == 'user': | |
output.append(f"### ユーザーのターン") | |
output.append(f"~~~\nユーザーは {entry['move']}を えらんだ!\n~~~") | |
return "\n".join(output) | |
def create_tictactoe_ui(): | |
"""Gradio UIを作成""" | |
with gr.Blocks(title="三目並べ vs LLM") as demo: | |
gr.Markdown("# 三目並べ vs LLM") | |
gr.Markdown("LLMとの三目並べゲームです。盤面をクリックして手を指してください。") | |
# ゲーム状態保持用 | |
game_state = gr.State(None) | |
player_symbol = gr.State(1) # デフォルト: プレイヤーがX (先手) | |
ai_thinking_state = gr.State("") | |
conversation_history = gr.State([]) # 会話履歴を別の状態として管理 | |
move_history = gr.State([]) # 新たに移動履歴の状態を追加 | |
with gr.Row(): | |
with gr.Column(scale=1): | |
player_choice = gr.Radio( | |
["X (先手)", "O (後手)"], | |
label="あなたの選択", | |
value="X (先手)", | |
interactive=True | |
) | |
start_button = gr.Button("ゲーム開始") | |
reset_button = gr.Button("リセット") | |
status = gr.Textbox(label="ゲーム状態", value="ゲームを開始してください") | |
with gr.Column(scale=2): | |
# 盤面表示 | |
board_output = gr.Dataframe( | |
headers=["A", "B", "C"], | |
row_count=3, | |
col_count=3, | |
value=[ | |
[" ", " ", " "], | |
[" ", " ", " "], | |
[" ", " ", " "] | |
], | |
interactive=False | |
) | |
# AIの思考プロセスを常にMarkdown形式で表示 | |
ai_thinking_md = gr.Markdown("### AIの思考プロセス\nまだ思考プロセスがありません。") | |
# クリック位置のマッピング | |
def handle_click(evt: gr.SelectData, game, player_sym, thinking, messages, history): | |
if game is None or game.is_game_over(): | |
return ( | |
[ | |
[" ", " ", " "], | |
[" ", " ", " "], | |
[" ", " ", " "] | |
], | |
"ゲームを開始してください", | |
format_thinking_multi_turn(history), | |
messages, | |
history | |
) | |
# クリック位置をマス目に変換 | |
row, col = evt.index | |
move_index = row * 3 + col | |
# プレイヤーの手が有効か確認 | |
if game.turn != player_sym or move_index not in game.get_legal_moves(): | |
return ( | |
board_to_display(game, "無効な手です", thinking)[0], | |
"無効な手です", | |
format_thinking_multi_turn(history), | |
messages, | |
history | |
) | |
# プレイヤーの手を反映 | |
try: | |
game.make_move(move_index) | |
# プレイヤーの手を履歴に追加 | |
user_move = game.index_to_coord(move_index) | |
updated_history = history + [{'type': 'user', 'move': user_move}] | |
# ゲーム終了チェック | |
if game.is_game_over(): | |
winner = game.get_winner() | |
if winner == 0: | |
status_text = "引き分け!" | |
else: | |
symbol = "X" if winner == 1 else "O" | |
is_player = winner == player_sym | |
status_text = f"{symbol}の勝ち! ({'あなた' if is_player else 'AI'})" | |
return ( | |
board_to_display(game, status_text, thinking)[0], | |
status_text, | |
format_thinking_multi_turn(updated_history), | |
messages, | |
updated_history | |
) | |
# AIの手番 | |
ai_move, new_thinking, new_messages = get_ai_move(game, game.turn, messages, player_sym) | |
if ai_move is not None and ai_move in game.get_legal_moves(): | |
game.make_move(ai_move) | |
# AIの手と思考を履歴に追加 | |
ai_move_coord = game.index_to_coord(ai_move) | |
updated_history = updated_history + [{'type': 'ai', 'move': ai_move_coord, 'thinking': new_thinking}] | |
# ゲーム終了チェック | |
if game.is_game_over(): | |
winner = game.get_winner() | |
if winner == 0: | |
status_text = "引き分け!" | |
else: | |
symbol = "X" if winner == 1 else "O" | |
is_player = winner == player_sym | |
status_text = f"{symbol}の勝ち! ({'あなた' if is_player else 'AI'})" | |
else: | |
status_text = "あなたの番です" | |
else: | |
status_text = "AIが有効な手を選択できませんでした。ゲームをリセットしてください。" | |
updated_history = history | |
return ( | |
board_to_display(game, status_text, new_thinking)[0], | |
status_text, | |
format_thinking_multi_turn(updated_history), | |
new_messages, | |
updated_history | |
) | |
except ValueError: | |
return ( | |
board_to_display(game, "無効な手です", thinking)[0], | |
"無効な手です", | |
format_thinking_multi_turn(history), | |
messages, | |
history | |
) | |
def start_game(choice): | |
game = TicTacToeBoard() | |
player_sym = 1 if choice == "X (先手)" else 2 | |
ai_sym = 3 - player_sym # AIのシンボル | |
# システムプロンプトを作成 | |
symbol = "X" if ai_sym == 1 else "O" | |
system_prompt = ( | |
f"あなたは{symbol}としてユーザーと対戦する三目並べのプロです。\n" | |
"まず、<think></think>タグ内であなたの思考過程を考えてください。" | |
"次に、有効な手の中から1つを選び、<move></move>タグ内に座標形式(例:A1、B2、C3)で示してください。\n" | |
"以下の形式で回答してください:\n" | |
"<think>\n...\n</think>\n<move>\n...\n</move>" | |
) | |
# 会話履歴とゲーム履歴を初期化 | |
initial_messages = [{"role": "system", "content": system_prompt}] | |
thinking = "" | |
game_history = [] | |
# プレイヤーがOの場合、AIが先手 | |
if player_sym == 2: | |
user_prompt = create_user_prompt(game, player_sym) | |
initial_messages.append({"role": "user", "content": user_prompt}) | |
# モデルで推論を実行 | |
text = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True) | |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
generated_ids = model.generate(**model_inputs, max_new_tokens=512) | |
generated_ids = [ | |
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
] | |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
# AIの応答を会話履歴に追加 | |
initial_messages.append({"role": "assistant", "content": response}) | |
# 応答から手と思考過程を抽出 | |
thinking = extract_thinking(response) | |
move_coord = extract_move(response) | |
ai_move = coord_to_index(move_coord) if move_coord else None | |
if ai_move is not None: | |
game.make_move(ai_move) | |
# ゲーム履歴にAIの手を追加 | |
game_history.append({ | |
'type': 'ai', | |
'move': move_coord, | |
'thinking': thinking | |
}) | |
status_text = "あなたの番です" | |
else: | |
status_text = "AIが手を選択できませんでした。リセットしてください。" | |
else: | |
status_text = "あなたの番です" | |
board_display = board_to_display(game, status_text, thinking)[0] | |
return ( | |
game, | |
player_sym, | |
board_display, | |
status_text, | |
format_thinking_multi_turn(game_history), | |
thinking, | |
initial_messages, | |
game_history | |
) | |
def reset_game(): | |
return ( | |
None, | |
1, | |
[ | |
[" ", " ", " "], | |
[" ", " ", " "], | |
[" ", " ", " "] | |
], | |
"ゲームをリセットしました。開始するには「ゲーム開始」を押してください。", | |
"### AIの思考プロセス\nまだ思考プロセスがありません。", | |
"", | |
[], | |
[] | |
) | |
# イベントハンドラーの設定 | |
board_output.select( | |
handle_click, | |
inputs=[game_state, player_symbol, ai_thinking_state, conversation_history, move_history], | |
outputs=[board_output, status, ai_thinking_md, conversation_history, move_history] | |
) | |
start_button.click( | |
start_game, | |
inputs=[player_choice], | |
outputs=[ | |
game_state, | |
player_symbol, | |
board_output, | |
status, | |
ai_thinking_md, | |
ai_thinking_state, | |
conversation_history, | |
move_history | |
] | |
) | |
reset_button.click( | |
reset_game, | |
outputs=[ | |
game_state, | |
player_symbol, | |
board_output, | |
status, | |
ai_thinking_md, | |
ai_thinking_state, | |
conversation_history, | |
move_history | |
] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_tictactoe_ui() | |
demo.launch() |