Hhhhvasasasasdsddsdsxxxxxxxxxxxxx

Running

App Files Files Community

Hjgugugjhuhjggg commited on Dec 4, 2024

Commit

4f047bc

verified ·

1 Parent(s): db25a46

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -101

app.py CHANGED Viewed

@@ -18,7 +18,10 @@ from threading import Thread
 import gptcache
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
@@ -101,118 +104,150 @@ global_data = {
     'ssm_dt_rank': {},
     'ssm_dt_b_c_rms': {},
     'vocab_type': {},
-    'model_type': {}
 }
 model_configs = [
     {"repo_id": "Hjgugugjhuhjggg/testing_semifinal-Q2_K-GGUF", "filename": "testing_semifinal-q2_k.gguf", "name": "testing"},
     {"repo_id": "bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF", "filename": "Llama-3.2-3B-Instruct-uncensored-Q2_K.gguf", "name": "Llama-3.2-3B-Instruct"},
     {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-q2_k.gguf", "name": "Meta-Llama-3.1-70B"},
-    {"repo_id": "bartowski/QwQ-32B-Preview-GGUF", "filename": "QwQ-32B-Preview-Q2_K.gguf", "name": "QwQ-32B-Preview"},
-    {"repo_id": "Ffftdtd5dtft/Codestral-22B-v0.1-Q2_K-GGUF", "filename": "codestral-22b-v0.1-q2_k.gguf", "name": "Codestral-22B-v0.1"},
-    {"repo_id": "Ffftdtd5dtft/WizardLM-13B-Uncensored-Q2_K-GGUF", "filename": "wizardlm-13b-uncensored-q2_k.gguf", "name": "WizardLM-13B-Uncensored"},
-    {"repo_id": "Ffftdtd5dtft/Qwen2-Math-72B-Instruct-Q2_K-GGUF", "filename": "qwen2-math-72b-instruct-q2_k.gguf", "name": "Qwen2-Math-72B-Instruct"},
-    {"repo_id": "Ffftdtd5dtft/Phi-3-mini-128k-instruct-Q2_K-GGUF", "filename": "phi-3-mini-128k-instruct-q2_k.gguf", "name": "Phi-3-mini-128k"},
-    {"repo_id": "Ffftdtd5dtft/DeepSeek-Coder-V2-Lite-Instruct-Q2_K-GGUF", "filename": "deepseek-coder-v2-lite-instruct-q2_k.gguf", "name": "DeepSeek-Coder-V2-Lite"},
-    {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral-Nemo-Instruct-2407"}
 ]
-class ModelManager:
-    def __init__(self):
-        self.models = {}
-    def load_model(self, model_config):
-        if model_config['name'] not in self.models:
-            try:
-                self.models[model_config['name']] = Llama.from_pretrained(
-                    repo_id=model_config['repo_id'],
-                    filename=model_config['filename'],
-                    use_auth_token=HUGGINGFACE_TOKEN,
-                    n_threads=20,
-                    use_gpu=False
-                )
-            except Exception as e:
-                print(f"Error loading model {model_config['name']}: {e}")
-    def load_all_models(self):
-        loop = asyncio.get_event_loop()
-        tasks = [loop.run_in_executor(None, self.load_model, config) for config in model_configs]
-        loop.run_until_complete(asyncio.gather(*tasks))
-        return self.models
-model_manager = ModelManager()
-global_data['models'] = model_manager.load_all_models()
-def release_resources():
-    try:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        gc.collect()
-    except Exception as e:
-        print(f"Failed to release resources: {e}")
-def resource_manager():
-    MAX_RAM_PERCENT = 90
-    MAX_CPU_PERCENT = 90
-    MAX_GPU_PERCENT = 90
-    while True:
-        try:
-            virtual_mem = psutil.virtual_memory()
-            current_ram_percent = virtual_mem.percent
-            if current_ram_percent > MAX_RAM_PERCENT:
-                release_resources()
-            current_cpu_percent = psutil.cpu_percent()
-            if current_cpu_percent > MAX_CPU_PERCENT:
-                psutil.Process(os.getpid()).nice()
-            if torch.cuda.is_available():
-                gpu = torch.cuda.current_device()
-                gpu_mem = torch.cuda.memory_allocated(gpu) / (1024 * 1024)
-                total_gpu_mem = torch.cuda.get_device_properties(gpu).total_memory / (1024 * 1024)
-                gpu_mem_percent = (gpu_mem / total_gpu_mem) * 100
-                if gpu_mem_percent > MAX_GPU_PERCENT:
-                    release_resources()
-        except Exception as e:
-            print(f"Error in resource manager: {e}")
-def run_resource_manager():
-    resource_manager()
-Thread(target=run_resource_manager, daemon=True).start()
 def normalize_input(input_text):
-    return input_text.strip()
-def remove_duplicates(text):
-    lines = text.split('\n')
-    unique_lines = []
-    seen_lines = set()
-    for line in lines:
-        if line not in seen_lines:
-            unique_lines.append(line)
-            seen_lines.add(line)
-    return '\n'.join(unique_lines)
-def get_best_response(responses):
-    responses = [response for response in responses if response and not set(response.lower().split()).intersection(ENGLISH_STOP_WORDS)]
-    if not responses:
-        return "No valid content generated."
-    vectorizer = TfidfVectorizer().fit_transform(responses)
-    similarity_matrix = cosine_similarity(vectorizer)
-    total_similarities = similarity_matrix.sum(axis=1)
-    best_response_index = total_similarities.argmax()
-    return responses[best_response_index]
 async def generate_model_response(model, inputs):
     try:
-        response = await model(inputs)
-        return remove_duplicates(response['choices'][0]['text'])
     except Exception as e:
         return ""

 import gptcache
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.feature_extraction.text import TfidfVectorizer
+import nltk
+from nltk.corpus import stopwords
+nltk.download('stopwords')
 load_dotenv()
 HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
     'ssm_dt_rank': {},
     'ssm_dt_b_c_rms': {},
     'vocab_type': {},
+    'model_type': {},
+    "general.architecture": {},
+    "general.type": {},
+    "general.name": {},
+    "general.finetune": {},
+    "general.basename": {},
+    "general.size_label": {},
+    "general.license": {},
+    "general.license.link": {},
+    "general.tags": {},
+    "general.languages": {},
+    "general.organization": {},
+    "general.base_model.count": {},
+    'general.file_type': {},
+    "phi3.context_length": {},
+    "phi3.rope.scaling.original_context_length": {},
+    "phi3.embedding_length": {},
+    "phi3.feed_forward_length": {},
+    "phi3.block_count": {},
+    "phi3.attention.head_count": {},
+    "phi3.attention.head_count_kv": {},
+    "phi3.attention.layer_norm_rms_epsilon": {},
+    "phi3.rope.dimension_count": {},
+    "phi3.rope.freq_base": {},
+    "phi3.attention.sliding_window": {},
+    "phi3.rope.scaling.attn_factor": {},
+    "llama.block_count": {},
+    "llama.context_length": {},
+    "llama.embedding_length": {},
+    "llama.feed_forward_length": {},
+    "llama.attention.head_count": {},
+    "llama.attention.head_count_kv": {},
+    "llama.rope.freq_base": {},
+    "llama.attention.layer_norm_rms_epsilon": {},
+    "llama.attention.key_length": {},
+    "llama.attention.value_length": {},
+    "llama.vocab_size": {},
+    "llama.rope.dimension_count": {},
+    "deepseek2.block_count": {},
+    "deepseek2.context_length": {},
+    "deepseek2.embedding_length": {},
+    "deepseek2.feed_forward_length": {},
+    "deepseek2.attention.head_count": {},
+    "deepseek2.attention.head_count_kv": {},
+    "deepseek2.rope.freq_base": {},
+    "deepseek2.attention.layer_norm_rms_epsilon": {},
+    "deepseek2.expert_used_count": {},
+    "deepseek2.leading_dense_block_count": {},
+    "deepseek2.vocab_size": {},
+    "deepseek2.attention.kv_lora_rank": {},
+    "deepseek2.attention.key_length": {},
+    "deepseek2.attention.value_length": {},
+    "deepseek2.expert_feed_forward_length": {},
+    "deepseek2.expert_count": {},
+    "deepseek2.expert_shared_count": {},
+    "deepseek2.expert_weights_scale": {},
+    "deepseek2.rope.dimension_count": {},
+    "deepseek2.rope.scaling.type": {},
+    "deepseek2.rope.scaling.factor": {},
+    "deepseek2.rope.scaling.yarn_log_multiplier": {},
+    "qwen2.block_count": {},
+    "qwen2.context_length": {},
+    "qwen2.embedding_length": {},
+    "qwen2.feed_forward_length": {},
+    "qwen2.attention.head_count": {},
+    "qwen2.attention.head_count_kv": {},
+    "qwen2.rope.freq_base": {},
+    "qwen2.attention.layer_norm_rms_epsilon": {},
+    "general.version": {},
+    "general.datasets": {},
+    "tokenizer.ggml.model": {},
+    "tokenizer.ggml.pre": {},
+    "tokenizer.ggml.tokens": {},
+    "tokenizer.ggml.token_type": {},
+    "tokenizer.ggml.merges": {},
+    "tokenizer.ggml.bos_token_id": {},
+    "tokenizer.ggml.eos_token_id": {},
+    "tokenizer.ggml.unknown_token_id": {},
+    "tokenizer.ggml.padding_token_id": {},
+    "tokenizer.ggml.add_bos_token": {},
+    "tokenizer.ggml.add_eos_token": {},
+    "tokenizer.ggml.add_space_prefix": {},
+    "tokenizer.chat_template": {},
+    "quantize.imatrix.file": {},
+    "quantize.imatrix.dataset": {},
+    "quantize.imatrix.entries_count": {},
+    "quantize.imatrix.chunks_count": {},
+    "general.quantization_version": {},
+    'n_lora_q': {},
+    'n_lora_kv': {},
+    'n_expert_shared': {},
+    'n_ff_exp': {},
+    "n_layer_dense_lead": {},
+    "expert_weights_scale": {},
+    "rope_yarn_log_mul": {},
+    'model_type': {},
+    'eval': {},
+    'time': {},
+    'token': {},
+    'tokens': {},
+    'pads': {},
+    'model': {},
+    'base': {},
+    'model_base': {},
+    'perhaps': {},
+    'word': {},
+    'words': {},
+    'start': {},
+    'stop': {},
+    'run': {},
+    'runs': {},
+    'ms': {},
+    'vocabulary': {},
+    'timeout': {},
+    'load': {},
+    'load_time': {},
+    'bas': {},
+    'tok': {},
+    'second': {},
+    'seconds': {},
+    'graph': {},
+    'load_model': {},
+    'end': {},
+    'llama_perf_context_print': {},
+    'llm_load_print_meta': {}
 }
 model_configs = [
     {"repo_id": "Hjgugugjhuhjggg/testing_semifinal-Q2_K-GGUF", "filename": "testing_semifinal-q2_k.gguf", "name": "testing"},
     {"repo_id": "bartowski/Llama-3.2-3B-Instruct-uncensored-GGUF", "filename": "Llama-3.2-3B-Instruct-uncensored-Q2_K.gguf", "name": "Llama-3.2-3B-Instruct"},
     {"repo_id": "Ffftdtd5dtft/Meta-Llama-3.1-70B-Q2_K-GGUF", "filename": "meta-llama-3.1-70b-q2_k.gguf", "name": "Meta-Llama-3.1-70B"},
+    {"repo_id": "Hhhbvvkgh/Heidi-Llama-v4-Q2_K-GGUF", "filename": "heidi-llama-v4-q2_k.gguf", "name": "Heidi-Llama-V4"}
 ]
 def normalize_input(input_text):
+    stop_words = set(stopwords.words('english'))
+    words = input_text.split()
+    filtered_words = [word for word in words if word.lower() not in stop_words]
+    return " ".join(filtered_words)
 async def generate_model_response(model, inputs):
     try:
+        response = await model.generate(inputs)
+        return response
     except Exception as e:
         return ""