from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import gradio as gr
import time
import spaces
import copy

# モデルとトークナイザーをモジュールレベルで読み込む
model_name = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

class TicTacToeBoard:
    def __init__(self):
        # 0: 空, 1: X (先手), 2: O (後手)
        self.board = [0] * 9  # 3x3ボード（インデックスで0-8）
        self.turn = 1  # 1=Xプレイヤー, 2=Oプレイヤー
        self.moves = []  # 指し手の履歴
        self.last_move = None  # 最後に指された手

    def is_game_over(self):
        """ゲームが終了しているかをチェック"""
        # 勝者がいる場合
        if self.get_winner() != 0:
            return True

        # 引き分け（すべてのマスが埋まっている場合）
        if 0 not in self.board:
            return True

        return False

    def get_winner(self):
        """勝者を返す（0=なし/引分、1=X勝ち、2=O勝ち）"""
        # 勝ちパターン（横・縦・斜め）
        win_patterns = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # 横
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # 縦
            [0, 4, 8], [2, 4, 6]              # 斜め
        ]

        for pattern in win_patterns:
            if self.board[pattern[0]] != 0 and self.board[pattern[0]] == self.board[pattern[1]] == self.board[pattern[2]]:
                return self.board[pattern[0]]  # 勝者を返す

        return 0  # 勝者なし

    def get_legal_moves(self):
        """合法手のリスト（0-8のインデックス）を返す"""
        if self.is_game_over():
            return []

        return [i for i in range(9) if self.board[i] == 0]

    def make_move(self, position):
        """手を指す（positionは0-8のインデックス）"""
        if position not in self.get_legal_moves():
            raise ValueError(f"無効な手: {position}")

        self.board[position] = self.turn
        self.moves.append(position)
        self.last_move = position  # 最後の手を記録
        self.turn = 3 - self.turn  # 手番を交代（1→2, 2→1）

    def to_string(self):
        """人間が読める形式で盤面を文字列化（マークダウン表形式）"""
        symbols = {0: "・", 1: "X", 2: "O"}
        result = []

        # マークダウン表のヘッダー
        result.append("|   | A | B | C |")
        result.append("|---|---|---|---|")
        
        # マークダウン表の本体
        for i in range(3):
            row = [f"| {i+1} "]
            for j in range(3):
                index = i * 3 + j
                row.append(f"| {symbols[self.board[index]]} ")
            row.append("|")
            result.append("".join(row))

        return "\n".join(result)
    
    def index_to_coord(self, index):
        """インデックス（0-8）を座標（A1, B2など）に変換"""
        if index is None:
            return None
            
        row = index // 3 + 1
        col = chr(ord('A') + (index % 3))
        return f"{col}{row}"

def create_user_prompt(board, player_sym):
    """ユーザープロンプトを生成"""
    # 有効な手の一覧を生成
    legal_moves = board.get_legal_moves()
    valid_moves = []
    for move in legal_moves:
        row = move // 3 + 1
        col = move % 3
        coord = f"{chr(ord('A') + col)}{row}"
        valid_moves.append(coord)

    # 盤面の自然言語的な記述を生成
    x_positions = []
    o_positions = []
    for i, piece in enumerate(board.board):
        if piece != 0:
            row = i // 3 + 1
            col = chr(ord('A') + (i % 3))
            pos = f"{col}{row}"
            if piece == 1:
                x_positions.append(pos)
            else:
                o_positions.append(pos)

    # 盤面の記述を構築
    board_description = "盤面の説明:"
    if not (x_positions or o_positions):
        board_description += "\n空の盤面"
    else:
        if x_positions:
            board_description += f"\nX: {', '.join(x_positions)}"
        if o_positions:
            board_description += f"\nO: {', '.join(o_positions)}"

    # ユーザーの最後の手の情報を追加
    user_move_info = ""
    if board.last_move is not None:
        last_move_piece = board.board[board.last_move]
        # プレイヤーが最後に指した手かどうかを確認
        if last_move_piece == player_sym:
            last_move_coord = board.index_to_coord(board.last_move)
            user_move_info = f"ユーザーが{last_move_coord}を選びました。\n"

    user_prompt = (
        f"{user_move_info}"
        f"### 現在の盤面\n{board.to_string()}\n"
        f"{board_description}\n\n"
        f"### 有効な手\n{', '.join(valid_moves)}\n\n"
    )

    return user_prompt

def extract_move(response):
    """応答から手を抽出"""
    matches = re.findall(r"<move>(.*?)</move>", response, re.DOTALL)
    return matches[-1].strip() if matches else None

def extract_thinking(response):
    """応答から思考過程を抽出"""
    think_match = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
    return think_match.group(1).strip() if think_match else ""

def coord_to_index(coord):
    """座標（A1, B2など）をインデックス（0-8）に変換"""
    if not coord or len(coord) != 2:
        return None

    try:
        col = ord(coord[0].upper()) - ord('A')
        row = int(coord[1]) - 1

        if col < 0 or col > 2 or row < 0 or row > 2:
            return None

        return row * 3 + col
    except:
        return None

@spaces.GPU(duration=120)
def get_ai_move(board, player, conversation_history, player_sym):
    """AIの手を取得"""
    # 新しいユーザープロンプトを作成
    user_prompt = create_user_prompt(board, player_sym)
    
    # 会話履歴のディープコピーを作成して変更
    messages = copy.deepcopy(conversation_history)
    
    # ユーザープロンプトを会話履歴に追加
    messages.append({"role": "user", "content": user_prompt})
    
    # モデルで推論を実行
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    generated_ids = model.generate(**model_inputs, max_new_tokens=512)
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # AIの応答を会話履歴に追加
    messages.append({"role": "assistant", "content": response})
    
    # 応答から手と思考過程を抽出
    thinking = extract_thinking(response)
    move_coord = extract_move(response)
    move_index = coord_to_index(move_coord) if move_coord else None
    
    # デバッグ情報（会話履歴の長さを確認）
    print(f"会話履歴の現在の長さ: {len(messages)}")
    
    return move_index, thinking, messages

def board_to_display(board, game_state="", ai_thinking=""):
    """ボードの状態を表示用に変換"""
    symbols = {0: " ", 1: "X", 2: "O"}
    board_display = []
    
    for i in range(3):
        row = []
        for j in range(3):
            index = i * 3 + j
            row.append(symbols[board.board[index]])
        board_display.append(row)
    
    return board_display, game_state, ai_thinking

def format_thinking_multi_turn(move_history):
    """思考プロセスをマルチターン形式でフォーマット"""
    if not move_history:
        return "### AIの思考プロセス\nまだ思考プロセスがありません。"
    
    output = []
    
    for entry in move_history:
        if entry['type'] == 'ai':
            output.append(f"### LLMのターン")
            output.append(f"{entry['thinking']}")
            output.append(f"~~~\nLLMは　{entry['move']}を　えらんだ！\n~~~")
        elif entry['type'] == 'user':
            output.append(f"### ユーザーのターン")
            output.append(f"~~~\nユーザーは　{entry['move']}を　えらんだ！\n~~~")
    
    return "\n".join(output)

def create_tictactoe_ui():
    """Gradio UIを作成"""
    with gr.Blocks(title="三目並べ vs LLM") as demo:
        gr.Markdown("# 三目並べ vs LLM")
        gr.Markdown("LLMとの三目並べゲームです。盤面をクリックして手を指してください。")
        
        # ゲーム状態保持用
        game_state = gr.State(None)
        player_symbol = gr.State(1)  # デフォルト: プレイヤーがX (先手)
        ai_thinking_state = gr.State("")
        conversation_history = gr.State([])  # 会話履歴を別の状態として管理
        move_history = gr.State([])  # 新たに移動履歴の状態を追加
        
        with gr.Row():
            with gr.Column(scale=1):
                player_choice = gr.Radio(
                    ["X (先手)", "O (後手)"], 
                    label="あなたの選択", 
                    value="X (先手)",
                    interactive=True
                )
                
                start_button = gr.Button("ゲーム開始")
                reset_button = gr.Button("リセット")
                
                status = gr.Textbox(label="ゲーム状態", value="ゲームを開始してください")
                
            with gr.Column(scale=2):
                # 盤面表示
                board_output = gr.Dataframe(
                    headers=["A", "B", "C"],
                    row_count=3,
                    col_count=3,
                    value=[
                        [" ", " ", " "],
                        [" ", " ", " "],
                        [" ", " ", " "]
                    ],
                    interactive=False
                )
        
        # AIの思考プロセスを常にMarkdown形式で表示
        ai_thinking_md = gr.Markdown("### AIの思考プロセス\nまだ思考プロセスがありません。")
        
        # クリック位置のマッピング
        def handle_click(evt: gr.SelectData, game, player_sym, thinking, messages, history):
            if game is None or game.is_game_over():
                return (
                    [
                        [" ", " ", " "],
                        [" ", " ", " "],
                        [" ", " ", " "]
                    ],
                    "ゲームを開始してください", 
                    format_thinking_multi_turn(history),
                    messages,
                    history
                )
                
            # クリック位置をマス目に変換
            row, col = evt.index
            move_index = row * 3 + col
            
            # プレイヤーの手が有効か確認
            if game.turn != player_sym or move_index not in game.get_legal_moves():
                return (
                    board_to_display(game, "無効な手です", thinking)[0], 
                    "無効な手です", 
                    format_thinking_multi_turn(history),
                    messages,
                    history
                )
            
            # プレイヤーの手を反映
            try:
                game.make_move(move_index)
                # プレイヤーの手を履歴に追加
                user_move = game.index_to_coord(move_index)
                updated_history = history + [{'type': 'user', 'move': user_move}]
                
                # ゲーム終了チェック
                if game.is_game_over():
                    winner = game.get_winner()
                    if winner == 0:
                        status_text = "引き分け！"
                    else:
                        symbol = "X" if winner == 1 else "O"
                        is_player = winner == player_sym
                        status_text = f"{symbol}の勝ち！ ({'あなた' if is_player else 'AI'})"
                    
                    return (
                        board_to_display(game, status_text, thinking)[0], 
                        status_text, 
                        format_thinking_multi_turn(updated_history),
                        messages,
                        updated_history
                    )
                
                # AIの手番
                ai_move, new_thinking, new_messages = get_ai_move(game, game.turn, messages, player_sym)
                
                if ai_move is not None and ai_move in game.get_legal_moves():
                    game.make_move(ai_move)
                    # AIの手と思考を履歴に追加
                    ai_move_coord = game.index_to_coord(ai_move)
                    updated_history = updated_history + [{'type': 'ai', 'move': ai_move_coord, 'thinking': new_thinking}]
                    
                    # ゲーム終了チェック
                    if game.is_game_over():
                        winner = game.get_winner()
                        if winner == 0:
                            status_text = "引き分け！"
                        else:
                            symbol = "X" if winner == 1 else "O"
                            is_player = winner == player_sym
                            status_text = f"{symbol}の勝ち！ ({'あなた' if is_player else 'AI'})"
                    else:
                        status_text = "あなたの番です"
                else:
                    status_text = "AIが有効な手を選択できませんでした。ゲームをリセットしてください。"
                    updated_history = history
                
                return (
                    board_to_display(game, status_text, new_thinking)[0], 
                    status_text, 
                    format_thinking_multi_turn(updated_history),
                    new_messages,
                    updated_history
                )
                
            except ValueError:
                return (
                    board_to_display(game, "無効な手です", thinking)[0], 
                    "無効な手です", 
                    format_thinking_multi_turn(history),
                    messages,
                    history
                )
        
        def start_game(choice):
            game = TicTacToeBoard()
            player_sym = 1 if choice == "X (先手)" else 2
            ai_sym = 3 - player_sym  # AIのシンボル
            
            # システムプロンプトを作成
            symbol = "X" if ai_sym == 1 else "O"
            system_prompt = (
                f"あなたは{symbol}としてユーザーと対戦する三目並べのプロです。\n"
                "まず、<think></think>タグ内であなたの思考過程を考えてください。"
                "次に、有効な手の中から1つを選び、<move></move>タグ内に座標形式（例：A1、B2、C3）で示してください。\n"
                "以下の形式で回答してください：\n"
                "<think>\n...\n</think>\n<move>\n...\n</move>"
            )
            
            # 会話履歴とゲーム履歴を初期化
            initial_messages = [{"role": "system", "content": system_prompt}]
            thinking = ""
            game_history = []
            
            # プレイヤーがOの場合、AIが先手
            if player_sym == 2:
                user_prompt = create_user_prompt(game, player_sym)
                initial_messages.append({"role": "user", "content": user_prompt})
                
                # モデルで推論を実行
                text = tokenizer.apply_chat_template(initial_messages, tokenize=False, add_generation_prompt=True)
                model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
                generated_ids = model.generate(**model_inputs, max_new_tokens=512)
                generated_ids = [
                    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
                ]

                response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
                
                # AIの応答を会話履歴に追加
                initial_messages.append({"role": "assistant", "content": response})
                
                # 応答から手と思考過程を抽出
                thinking = extract_thinking(response)
                move_coord = extract_move(response)
                ai_move = coord_to_index(move_coord) if move_coord else None
                
                if ai_move is not None:
                    game.make_move(ai_move)
                    # ゲーム履歴にAIの手を追加
                    game_history.append({
                        'type': 'ai',
                        'move': move_coord,
                        'thinking': thinking
                    })
                    status_text = "あなたの番です"
                else:
                    status_text = "AIが手を選択できませんでした。リセットしてください。"
            else:
                status_text = "あなたの番です"
            
            board_display = board_to_display(game, status_text, thinking)[0]
            
            return (
                game, 
                player_sym,
                board_display, 
                status_text, 
                format_thinking_multi_turn(game_history),
                thinking,
                initial_messages,
                game_history
            )
        
        def reset_game():
            return (
                None, 
                1,
                [
                    [" ", " ", " "],
                    [" ", " ", " "],
                    [" ", " ", " "]
                ],
                "ゲームをリセットしました。開始するには「ゲーム開始」を押してください。",
                "### AIの思考プロセス\nまだ思考プロセスがありません。",
                "",
                [],
                []
            )
        
        # イベントハンドラーの設定
        board_output.select(
            handle_click,
            inputs=[game_state, player_symbol, ai_thinking_state, conversation_history, move_history],
            outputs=[board_output, status, ai_thinking_md, conversation_history, move_history]
        )
        
        start_button.click(
            start_game,
            inputs=[player_choice],
            outputs=[
                game_state, 
                player_symbol, 
                board_output, 
                status, 
                ai_thinking_md,
                ai_thinking_state,
                conversation_history,
                move_history
            ]
        )
        
        reset_button.click(
            reset_game,
            outputs=[
                game_state, 
                player_symbol, 
                board_output, 
                status, 
                ai_thinking_md,
                ai_thinking_state,
                conversation_history,
                move_history
            ]
        )
    
    return demo

if __name__ == "__main__":
    demo = create_tictactoe_ui()
    demo.launch()