from binoculars import Binoculars import torch import gc CHAT_MODEL_PAIR = { "observer": "deepseek-ai/deepseek-llm-7b-base", "performer": "deepseek-ai/deepseek-llm-7b-chat" } CODER_MODEL_PAIR = { "observer": "deepseek-ai/deepseek-llm-7b-base", "performer": "deepseek-ai/deepseek-coder-7b-instruct-v1.5" } def initialize_chat_model(): print("Initializing chat Binoculars model...") if torch.cuda.is_available(): torch.cuda.empty_cache() print(f"GPU Memory before chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") bino_chat = Binoculars( mode="accuracy", observer_name_or_path=CHAT_MODEL_PAIR["observer"], performer_name_or_path=CHAT_MODEL_PAIR["performer"], max_token_observed=2048 ) if torch.cuda.is_available(): print(f"GPU Memory after chat model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") return bino_chat def initialize_coder_model(): print("Initializing coder Binoculars model...") if torch.cuda.is_available(): torch.cuda.empty_cache() print(f"GPU Memory before coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") bino_coder = Binoculars( mode="accuracy", observer_name_or_path=CODER_MODEL_PAIR["observer"], performer_name_or_path=CODER_MODEL_PAIR["performer"], max_token_observed=2048 ) if torch.cuda.is_available(): print(f"GPU Memory after coder model: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") return bino_coder def compute_chat_score(text): print("Computing chat score...") bino_chat = initialize_chat_model() try: score_chat = bino_chat.compute_score(text) return {"score_chat": score_chat} finally: cleanup_model(bino_chat) def compute_coder_score(text): print("Computing coder score...") bino_coder = initialize_coder_model() try: score_coder = bino_coder.compute_score(text) return {"score_coder": score_coder} finally: cleanup_model(bino_coder) def compute_scores(text, use_chat=True, use_coder=True): scores = {} if use_chat: chat_scores = compute_chat_score(text) scores.update(chat_scores) if use_coder: coder_scores = compute_coder_score(text) scores.update(coder_scores) return scores def cleanup_model(model): if model: try: print(f"Cleaning up model resources...") model.free_memory() gc.collect() if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.synchronize() print(f"After cleanup: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB allocated") except Exception as e: print(f"Error during model cleanup: {str(e)}") def cleanup_models(bino_chat, bino_coder): if bino_chat: cleanup_model(bino_chat) if bino_coder: cleanup_model(bino_coder)