from binoculars import Binoculars
import torch
import gc

CHAT_MODEL_PAIR = {
    "observer": "deepseek-ai/deepseek-llm-7b-base",
    "performer": "deepseek-ai/deepseek-llm-7b-chat"
}

CODER_MODEL_PAIR = {
    "observer": "deepseek-ai/deepseek-llm-7b-base",
    "performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5"
}

def initialize_chat_model():
    print("Initializing chat Binoculars model...")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    bino_chat = Binoculars(
        mode="accuracy", 
        observer_name_or_path=CHAT_MODEL_PAIR["observer"],
        performer_name_or_path=CHAT_MODEL_PAIR["performer"],
        max_token_observed=2048
    )
    
    if torch.cuda.is_available():
        print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    return bino_chat

def initialize_coder_model():
    print("Initializing coder Binoculars model...")
    
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    bino_coder = Binoculars(
        mode="accuracy", 
        observer_name_or_path=CODER_MODEL_PAIR["observer"],
        performer_name_or_path=CODER_MODEL_PAIR["performer"],
        max_token_observed=2048
    )
    
    if torch.cuda.is_available():
        print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
    
    return bino_coder

def compute_chat_score(text):
    print("Computing chat score...")
    bino_chat = initialize_chat_model()
    
    try:
        score_chat = bino_chat.compute_score(text)
        return {"score_chat": score_chat}
    finally:
        cleanup_model(bino_chat)

def compute_coder_score(text):
    print("Computing coder score...")
    bino_coder = initialize_coder_model()
    
    try:
        score_coder = bino_coder.compute_score(text)
        return {"score_coder": score_coder}
    finally:
        cleanup_model(bino_coder)

def compute_scores(text, use_chat=True, use_coder=True):
    scores = {}
    
    if use_chat:
        chat_scores = compute_chat_score(text)
        scores.update(chat_scores)
    
    if use_coder:
        coder_scores = compute_coder_score(text)
        scores.update(coder_scores)
    
    return scores

def cleanup_model(model):
    if model:
        try:
            print(f"Cleaning up model resources...")
            model.free_memory()
            
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
                print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated")
        except Exception as e:
            print(f"Error during model cleanup: {str(e)}")
    
def cleanup_models(bino_chat, bino_coder):
    if bino_chat:
        cleanup_model(bino_chat)
    
    if bino_coder:
        cleanup_model(bino_coder)