Hinglish-CoTs
Collection
Here are some text-generation models for hinglish language
•
2 items
•
Updated
!pip install unsloth
from unsloth import FastLanguageModel
from transformers import TextStreamer
import torch
# Load your fine-tuned model
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="QuantumInk/Mistral-small-12B-Hinglish-cot",
max_seq_length=2048,
load_in_4bit=True
)
FastLanguageModel.for_inference(model)
# Streamer for real-time decoding
text_streamer = TextStreamer(tokenizer)
# Alpaca prompt template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{input_text}
### Response:
{output}"""
# Chat loop with memory
def chat():
print("💬 Chat with Qwen-2.5-Hindi-Hinglish-COT! Type '\\q' or 'quit' to exit.\n")
chat_history = "" # Full chat history with prompts and responses
while True:
user_input = input("➤ ")
if user_input.lower() in ["\\q", "quit"]:
print("\n👋 Exiting chat. Goodbye!")
break
# Format the current prompt
current_prompt = alpaca_prompt.format(
instruction="Continue the following conversation.",
input_text=user_input,
output=""
)
# Add to full chat history
chat_history += current_prompt + "\n"
# Tokenize the full prompt
inputs = tokenizer([chat_history], return_tensors="pt").to("cuda")
print("\n🤖: ", end="") # Prepare for streaming output
# Generate response using streamer
outputs = model.generate(
**inputs,
max_new_tokens=256,
temperature=0.7,
top_p=0.9,
do_sample=True,
no_repeat_ngram_size=2,
streamer=text_streamer
)
# Decode and capture response for chat history
full_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
response = full_output.split("### Response:")[-1].strip()
# Add response to chat history
chat_history += f"{response}\n"
# Run the chat
chat()
Base model
mistralai/Mistral-Small-Instruct-2409