import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # 检查是否有可用的 GPU device = "cuda" if torch.cuda.is_available() else "cpu" # 模型名称 model_name = "Qwen/Qwen2.5-3B-Instruct" # 加载分词器 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # 再次加载 LoRA 模型 base_model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device) lora_model = PeftModel.from_pretrained(base_model, "wangyiqun/qwen25_3b_instruct_lora_vulgarity_finetuned") # 推理示例 input_text = "Hello" input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(lora_model.device) output = lora_model.generate(input_ids, max_new_tokens=50, do_sample=True, top_p=0.95, temperature=0.35) output_text = tokenizer.decode(output[0], skip_special_tokens=True) print(output_text) #!pip install gradio import gradio as gr # 假设这些变量已经定义 # tokenizer, model # 对话函数 def chatbot(input_text, history): if history is None: history = [] # 将当前输入添加到历史记录中 history.append({"role": "user", "content": input_text}) messages = [ {"role": "system", "content": "根据用户友好的打招呼,和用户对骂,注意上下文语义连贯"} ] + history text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) global lora_model model = lora_model model_inputs = tokenizer([text], return_tensors="pt").to(model.device) generated_ids = model.generate( **model_inputs, max_new_tokens=512 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] # 将模型回复添加到历史记录中 history.append({"role": "assistant", "content": response}) # 转换为 Gradio Chatbot 所需的格式 chat_history = [(msg["content"] if msg["role"] == "user" else None, msg["content"] if msg["role"] == "assistant" else None) for msg in history if msg["role"] in ["user", "assistant"]] return chat_history, history # Gradio 界面 iface = gr.Interface( fn=chatbot, inputs=[gr.Textbox(label="输入你的问题"), gr.State()], outputs=[gr.Chatbot(label="聊天历史"), gr.State()], title="Qwen2.5-finetune-骂人专家", description="Qwen2.5-finetune-骂人专家" ) iface.launch(share=True, inbrowser=False, debug=True)