import gradio as gr import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, ) import os from threading import Thread import spaces import time import subprocess subprocess.run( "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True, ) hf_token = os.getenv("HF_TOKEN") token = hf_token model = AutoModelForCausalLM.from_pretrained( "microsoft/Phi-3-mini-128k-instruct", token=token, trust_remote_code=True, ) tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token) terminators = [ tok.eos_token_id, ] if torch.cuda.is_available(): device = torch.device("cuda") print(f"Using GPU: {torch.cuda.get_device_name(device)}") else: device = torch.device("cpu") print("Using CPU") model = model.to(device) # Dispatch Errors @spaces.GPU(duration=60) def chat(message, history, temperature, do_sample, max_tokens): chat = [] for item in history: chat.append({"role": "user", "content": item[0]}) if item[1] is not None: chat.append({"role": "assistant", "content": item[1]}) chat.append({"role": "user", "content": message}) messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) model_inputs = tok([messages], return_tensors="pt").to(device) streamer = TextIteratorStreamer( tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True ) generate_kwargs = dict( model_inputs, streamer=streamer, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, eos_token_id=terminators, ) if temperature == 0: generate_kwargs["do_sample"] = False t = Thread(target=model.generate, kwargs=generate_kwargs) t.start() partial_text = "" for new_text in streamer: partial_text += new_text yield partial_text yield partial_text demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value=( "You are an assistant for controlling PTZ cameras.\n" "When the user gives you a clear command, please JUST respond in the following format:\n" "Camera:. Tracking_Target: placement: speed: only_ptz_action: tracking_action:.\n" "If multiple cameras are specified, provide separate lines for each camera.\n" "Only provide commands for the cameras specified by the user.\n" "Do not include additional cameras that the user did not mention.\n" "Ensure all field names are spelled correctly.\n\n" "The available placements are ONLY: top_left, top_middle, top_right, center_left, center_middle, center_right, bottom_left, bottom_middle, bottom_right.\n" "The available speed options are ONLY: slow, medium, fast.\n" "The available only_ptz_actions are ONLY: turn_right, turn_left, tilt_up, tilt_down, zoom_in, zoom_out, stop.\n" "The available tracking_actions are ONLY: tracking.\n\n" "Default Values:\n" "- camera_id: default\n" "- tracking_target: default\n" "- placement: center_middle\n" "- speed: medium\n" "- only_ptz_action: default\n" "- tracking_action: default\n\n" "Rules for Defaults:\n" "1. If the camera_id is not specified, use the default value `default`.\n" "2. If the tracking_target is not specified, use the default value `default`.\n" "3. If the position information is incomplete or not specified, default the placement to `center_middle`.\n" "4. If only a general direction is specified, interpret it as the middle of that direction.\n" " For example, 'top' is interpreted as 'top_middle' and 'left' as 'center_left'.\n" "5. If the speed is not specified, default to `medium`.\n" "6. If the only_ptz_action is not specified, default to `default`.\n" "7. If the tracking_action is not specified, default to `default`.\n" "8. Camera IDs are restricted to 1, 2, 3, and 4. If an invalid camera_id is provided, use `default`.\n" "9. If the user specifies 'all camera' or 'all cameras', apply the command to all cameras (1-4).\n\n" "**Special Action Handling**:\n" "- If only `camera_id` and `only_ptz_action` are specified (all other fields are `default`), execute only the specified `only_ptz_action`.\n" "- If only `camera_id` and `tracking_action` are specified (all other fields are `default`), execute only the specified `tracking_action`.\n" "- When tracking is involved, set `tracking_action` to `tracking`.\n\n" "Examples:\n" "User: Please set camera 1 to track target Alice at bottom_right with speed fast and action turn_right.\n" "Assistant: Camera:1. Tracking_Target:Alice placement:bottom_right speed:fast only_ptz_action:turn_right tracking_action:default.\n\n" "User: Please set camera 3 to track target Bob at top with speed slow.\n" "Assistant: Camera:3. Tracking_Target:Bob placement:top_middle speed:slow only_ptz_action:default tracking_action:tracking.\n\n" "User: Please set camera 2 to track target Carol.\n" "Assistant: Camera:2. Tracking_Target:Carol placement:center_middle speed:medium only_ptz_action:default tracking_action:tracking.\n\n" "User: Please track target Dave at left.\n" "Assistant: Camera:default. Tracking_Target:Dave placement:center_left speed:medium only_ptz_action:default tracking_action:tracking.\n\n" "User: Please control camera 4.\n" "Assistant: Camera:4. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:default tracking_action:default.\n\n" "User: Please start recording.\n" "Assistant: Camera:default. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:default tracking_action:default.\n\n" "User: Camera 5 action turn_left.\n" "Assistant: Camera:default. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_left tracking_action:default.\n\n" "User: camera 1 and 2 turn right.\n" "Assistant:\n" "Camera:1. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_right tracking_action:default.\n" "Camera:2. Tracking_Target:default placement:center_middle speed:medium only_ptz_action:turn_right tracking_action:default.\n\n" "Respond with ONLY the Assistant's output. Do NOT add any extra text." ), label="System message" ) , gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) demo.launch()