Spaces:
Sleeping
Sleeping
from llama_cpp import Llama | |
# Path to the GGUF model file | |
MODEL_PATH = "llama-3.1-8B.gguf" | |
# Load the model | |
print("Loading the model...") | |
try: | |
llama = Llama(model_path=MODEL_PATH, n_ctx=1024, n_threads=4) | |
print("Model loaded successfully!") | |
except Exception as e: | |
print(f"Failed to load the model: {e}") | |
exit(1) | |
# Chat loop | |
print("Chat with the model! Type 'exit' to end the conversation.") | |
while True: | |
user_input = input("You: ").strip() | |
if user_input.lower() == "exit": | |
print("Exiting chat. Goodbye!") | |
break | |
# Query the model | |
print("Thinking...") | |
response = llama( | |
user_input, | |
max_tokens=50, # Limit response length | |
temperature=0.7, # Control randomness | |
top_p=0.9, # Top-p sampling | |
stop=["You:"] # Stop at the next user prompt | |
) | |
# Extract and clean response text | |
response_text = response['choices'][0]['text'].strip() | |
print(f"Model: {response_text}") | |