Spaces:

misdelivery
/

LLM-Tic-Tac-Toe

Running on Zero

App Files Files Community

LLM-Tic-Tac-Toe / app.py

misdelivery

Update app.py

2a60d34 verified 6 days ago

raw

history blame contribute delete

19.7 kB

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import re
	import gradio as gr
	import time
	import spaces
	import copy

	# モデルとトークナイザーをモジュールレベルで読み込む
	model_name = "Qwen/Qwen2.5-7B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

	class TicTacToeBoard:
	def __init__(self):
	# 0: 空, 1: X (先手), 2: O (後手)
	self.board = [0] * 9 # 3x3ボード（インデックスで0-8）
	self.turn = 1 # 1=Xプレイヤー, 2=Oプレイヤー
	self.moves = [] # 指し手の履歴
	self.last_move = None # 最後に指された手

	def is_game_over(self):
	"""ゲームが終了しているかをチェック"""
	# 勝者がいる場合
	if self.get_winner() != 0:
	return True

	# 引き分け（すべてのマスが埋まっている場合）
	if 0 not in self.board:
	return True

	return False

	def get_winner(self):
	"""勝者を返す（0=なし/引分、1=X勝ち、2=O勝ち）"""
	# 勝ちパターン（横・縦・斜め）
	win_patterns = [
	[0, 1, 2], [3, 4, 5], [6, 7, 8], # 横
	[0, 3, 6], [1, 4, 7], [2, 5, 8], # 縦
	[0, 4, 8], [2, 4, 6] # 斜め
	]

	for pattern in win_patterns:
	if self.board[pattern[0]] != 0 and self.board[pattern[0]] == self.board[pattern[1]] == self.board[pattern[2]]:
	return self.board[pattern[0]] # 勝者を返す

	return 0 # 勝者なし

	def get_legal_moves(self):
	"""合法手のリスト（0-8のインデックス）を返す"""
	if self.is_game_over():
	return []

	return [i for i in range(9) if self.board[i] == 0]

	def make_move(self, position):
	"""手を指す（positionは0-8のインデックス）"""
	if position not in self.get_legal_moves():
	raise ValueError(f"無効な手: {position}")

	self.board[position] = self.turn
	self.moves.append(position)
	self.last_move = position # 最後の手を記録
	self.turn = 3 - self.turn # 手番を交代（1→2, 2→1）

	def to_string(self):
	"""人間が読める形式で盤面を文字列化（マークダウン表形式）"""
	symbols = {0: "・", 1: "X", 2: "O"}
	result = []

	# マークダウン表のヘッダー
	result.append("\| \| A \| B \| C \|")
	result.append("\|---\|---\|---\|---\|")

	# マークダウン表の本体
	for i in range(3):
	row = [f"\| {i+1} "]
	for j in range(3):
	index = i * 3 + j
	row.append(f"\| {symbols[self.board[index]]} ")
	row.append("\|")
	result.append("".join(row))

	return "\n".join(result)

	def index_to_coord(self, index):
	"""インデックス（0-8）を座標（A1, B2など）に変換"""
	if index is None:
	return None

	row = index // 3 + 1
	col = chr(ord('A') + (index % 3))
	return f"{col}{row}"

	def create_user_prompt(board, player_sym):
	"""ユーザープロンプトを生成"""
	# 有効な手の一覧を生成
	legal_moves = board.get_legal_moves()
	valid_moves = []
	for move in legal_moves:
	row = move // 3 + 1
	col = move % 3
	coord = f"{chr(ord('A') + col)}{row}"
	valid_moves.append(coord)

	# 盤面の自然言語的な記述を生成
	x_positions = []
	o_positions = []
	for i, piece in enumerate(board.board):
	if piece != 0:
	row = i // 3 + 1
	col = chr(ord('A') + (i % 3))
	pos = f"{col}{row}"
	if piece == 1:
	x_positions.append(pos)
	else:
	o_positions.append(pos)

	# 盤面の記述を構築
	board_description = "盤面の説明:"
	if not (x_positions or o_positions):
	board_description += "\n空の盤面"
	else:
	if x_positions:
	board_description += f"\nX: {', '.join(x_positions)}"
	if o_positions:
	board_description += f"\nO: {', '.join(o_positions)}"

	# ユーザーの最後の手の情報を追加
	user_move_info = ""
	if board.last_move is not None:
	last_move_piece = board.board[board.last_move]
	# プレイヤーが最後に指した手かどうかを確認
	if last_move_piece == player_sym:
	last_move_coord = board.index_to_coord(board.last_move)
	user_move_info = f"ユーザーが{last_move_coord}を選びました。\n"

	user_prompt = (
	f"{user_move_info}"
	f"### 現在の盤面\n{board.to_string()}\n"
	f"{board_description}\n\n"
	f"### 有効な手\n{', '.join(valid_moves)}\n\n"
	)

	return user_prompt

	def extract_move(response):
	"""応答から手を抽出"""
	matches = re.findall(r"<move>(.*?)</move>", response, re.DOTALL)
	return matches[-1].strip() if matches else None

	def extract_thinking(response):
	"""応答から思考過程を抽出"""
	think_match = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
	return think_match.group(1).strip() if think_match else ""

	def coord_to_index(coord):
	"""座標（A1, B2など）をインデックス（0-8）に変換"""
	if not coord or len(coord) != 2:
	return None

	try:
	col = ord(coord[0].upper()) - ord('A')
	row = int(coord[1]) - 1

	if col < 0 or col > 2 or row < 0 or row > 2:
	return None

	return row * 3 + col
	except:
	return None

	@spaces.GPU(duration=120)
	def get_ai_move(board, player, conversation_history, player_sym):
	"""AIの手を取得"""
	# 新しいユーザープロンプトを作成
	user_prompt = create_user_prompt(board, player_sym)

	# 会話履歴のディープコピーを作成して変更
	messages = copy.deepcopy(conversation_history)

	# ユーザープロンプトを会話履歴に追加
	messages.append({"role": "user", "content": user_prompt})

	# モデルで推論を実行
	text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
	generated_ids = model.generate(**model_inputs, max_new_tokens=512)
	generated_ids = [
	output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# AIの応答を会話履歴に追加
	messages.append({"role": "assistant", "content": response})

	# 応答から手と思考過程を抽出
	thinking = extract_thinking(response)
	move_coord = extract_move(response)
	move_index = coord_to_index(move_coord) if move_coord else None

	# デバッグ情報（会話履歴の長さを確認）
	print(f"会話履歴の現在の長さ: {len(messages)}")

	return move_index, thinking, messages

	def board_to_display(board, game_state="", ai_thinking=""):
	"""ボードの状態を表示用に変換"""
	symbols = {0: " ", 1: "X", 2: "O"}
	board_display = []

	for i in range(3):
	row = []
	for j in range(3):
	index = i * 3 + j
	row.append(symbols[board.board[index]])
	board_display.append(row)

	return board_display, game_state, ai_thinking

	def format_thinking_multi_turn(move_history):
	"""思考プロセスをマルチターン形式でフォーマット"""
	if not move_history:
	return "### AIの思考プロセス\nまだ思考プロセスがありません。"

	output = []

	for entry in move_history:
	if entry['type'] == 'ai':
	output.append(f"### LLMのターン")
	output.append(f"{entry['thinking']}")
	output.append(f"~~~\nLLMは　{entry['move']}を　えらんだ！\n~~~")
	elif entry['type'] == 'user':
	output.append(f"### ユーザーのターン")
	output.append(f"~~~\nユーザーは　{entry['move']}を　えらんだ！\n~~~")

	return "\n".join(output)

	def create_tictactoe_ui():
	"""Gradio UIを作成"""
	with gr.Blocks(title="三目並べ vs LLM") as demo:
	gr.Markdown("# 三目並べ vs LLM")
	gr.Markdown("LLMとの三目並べゲームです。盤面をクリックして手を指してください。")

	# ゲーム状態保持用
	game_state = gr.State(None)
	player_symbol = gr.State(1) # デフォルト: プレイヤーがX (先手)
	ai_thinking_state = gr.State("")
	conversation_history = gr.State([]) # 会話履歴を別の状態として管理
	move_history = gr.State([]) # 新たに移動履歴の状態を追加

	with gr.Row():
	with gr.Column(scale=1):
	player_choice = gr.Radio(
	["X (先手)", "O (後手)"],
	label="あなたの選択",
	value="X (先手)",
	interactive=True
	)

	start_button = gr.Button("ゲーム開始")
	reset_button = gr.Button("リセット")

	status = gr.Textbox(label="ゲーム状態", value="ゲームを開始してください")

	with gr.Column(scale=2):
	# 盤面表示
	board_output = gr.Dataframe(
	headers=["A", "B", "C"],
	row_count=3,
	col_count=3,
	value=[
	[" ", " ", " "],
	[" ", " ", " "],
	[" ", " ", " "]
	],
	interactive=False
	)

	# AIの思考プロセスを常にMarkdown形式で表示
	ai_thinking_md = gr.Markdown("### AIの思考プロセス\nまだ思考プロセスがありません。")

	# クリック位置のマッピング
	def handle_click(evt: gr.SelectData, game, player_sym, thinking, messages, history):
	if game is None or game.is_game_over():
	return (
	[
	[" ", " ", " "],
	[" ", " ", " "],
	[" ", " ", " "]
	],
	"ゲームを開始してください",
	format_thinking_multi_turn(history),
	messages,
	history
	)

	# クリック位置をマス目に変換
	row, col = evt.index
	move_index = row * 3 + col

	# プレイヤーの手が有効か確認
	if game.turn != player_sym or move_index not in game.get_legal_moves():
	return (
	board_to_display(game, "無効な手です", thinking)[0],
	"無効な手です",
	format_thinking_multi_turn(history),
	messages,
	history
	)

	# プレイヤーの手を反映
	try:
	game.make_move(move_index)
	# プレイヤーの手を履歴に追加
	user_move = game.index_to_coord(move_index)
	updated_history = history + [{'type': 'user', 'move': user_move}]

	# ゲーム終了チェック
	if game.is_game_over():
	winner = game.get_winner()
	if winner == 0:
	status_text = "引き分け！"
	else:
	symbol = "X" if winner == 1 else "O"
	is_player = winner == player_sym
	status_text = f"{symbol}の勝ち！ ({'あなた' if is_player else 'AI'})"

	return (
	board_to_display(game, status_text, thinking)[0],
	status_text,
	format_thinking_multi_turn(updated_history),
	messages,
	updated_history
	)

	# AIの手番
	ai_move, new_thinking, new_messages = get_ai_move(game, game.turn, messages, player_sym)

	if ai_move is not None and ai_move in game.get_legal_moves():
	game.make_move(ai_move)
	# AIの手と思考を履歴に追加
	ai_move_coord = game.index_to_coord(ai_move)
	updated_history = updated_history + [{'type': 'ai', 'move': ai_move_coord, 'thinking': new_thinking}]

	# ゲーム終了チェック
	if game.is_game_over():
	winner = game.get_winner()
	if winner == 0:
	status_text = "引き分け！"
	else:
	symbol = "X" if winner == 1 else "O"
	is_player = winner == player_sym
	status_text = f"{symbol}の勝ち！ ({'あなた' if is_player else 'AI'})"
	else:
	status_text = "あなたの番です"
	else:
	status_text = "AIが有効な手を選択できませんでした。ゲームをリセットしてください。"
	updated_history = history

	return (
	board_to_display(game, status_text, new_thinking)[0],
	status_text,
	format_thinking_multi_turn(updated_history),
	new_messages,
	updated_history
	)

	except ValueError:
	return (
	board_to_display(game, "無効な手です", thinking)[0],
	"無効な手です",
	format_thinking_multi_turn(history),
	messages,
	history
	)

	def start_game(choice):
	game = TicTacToeBoard()
	player_sym = 1 if choice == "X (先手)" else 2
	ai_sym = 3 - player_sym # AIのシンボル

	# システムプロンプトを作成
	symbol = "X" if ai_sym == 1 else "O"
	system_prompt = (
	f"あなたは{symbol}としてユーザーと対戦する三目並べのプロです。\n"
	"まず、<think></think>タグ内であなたの思考過程を考えてください。"
	"次に、有効な手の中から1つを選び、<move></move>タグ内に座標形式（例：A1、B2、C3）で示してください。\n"
	"以下の形式で回答してください：\n"
	"<think>\n...\n</think>\n<move>\n...\n</move>"
	)

	# 会話履歴とゲーム履歴を初期化
	initial_messages = [{"role": "system", "content": system_prompt}]
	thinking = ""
	game_history = []

	# プレイヤーがOの場合、AIが先手
	if player_sym == 2:
	user_prompt = create_user_prompt(game, player_sym)
	initial_messages.append({"role": "user", "content": user_prompt})

	# モデルで推論を実行
	text = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
	generated_ids = model.generate(**model_inputs, max_new_tokens=512)
	generated_ids = [
	output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
	]

	response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

	# AIの応答を会話履歴に追加
	initial_messages.append({"role": "assistant", "content": response})

	# 応答から手と思考過程を抽出
	thinking = extract_thinking(response)
	move_coord = extract_move(response)
	ai_move = coord_to_index(move_coord) if move_coord else None

	if ai_move is not None:
	game.make_move(ai_move)
	# ゲーム履歴にAIの手を追加
	game_history.append({
	'type': 'ai',
	'move': move_coord,
	'thinking': thinking
	})
	status_text = "あなたの番です"
	else:
	status_text = "AIが手を選択できませんでした。リセットしてください。"
	else:
	status_text = "あなたの番です"

	board_display = board_to_display(game, status_text, thinking)[0]

	return (
	game,
	player_sym,
	board_display,
	status_text,
	format_thinking_multi_turn(game_history),
	thinking,
	initial_messages,
	game_history
	)

	def reset_game():
	return (
	None,
	1,
	[
	[" ", " ", " "],
	[" ", " ", " "],
	[" ", " ", " "]
	],
	"ゲームをリセットしました。開始するには「ゲーム開始」を押してください。",
	"### AIの思考プロセス\nまだ思考プロセスがありません。",
	"",
	[],
	[]
	)

	# イベントハンドラーの設定
	board_output.select(
	handle_click,
	inputs=[game_state, player_symbol, ai_thinking_state, conversation_history, move_history],
	outputs=[board_output, status, ai_thinking_md, conversation_history, move_history]
	)

	start_button.click(
	start_game,
	inputs=[player_choice],
	outputs=[
	game_state,
	player_symbol,
	board_output,
	status,
	ai_thinking_md,
	ai_thinking_state,
	conversation_history,
	move_history
	]
	)

	reset_button.click(
	reset_game,
	outputs=[
	game_state,
	player_symbol,
	board_output,
	status,
	ai_thinking_md,
	ai_thinking_state,
	conversation_history,
	move_history
	]
	)

	return demo

	if __name__ == "__main__":
	demo = create_tictactoe_ui()
	demo.launch()