base_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat gate_mode: hidden # Common gating mechanism using hidden states. Alternatives: 'cheap_embed', 'random' dtype: float16 # Use float16 to save memory/disk space, common for inference experts: - source_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat positive_prompts: - "General use" - "Conversational" - "Question answering" - "Multilingual" - "Translation" - "Roleplay" - source_model: Pinkstack/llama-3.2-superthoughts-expert-math positive_prompts: - "Mathematical" - "Algebra" - "Shape understanding" - "counting problem" - "Explain math" - "placing objects" - source_model: Pinkstack/llama-3.2-superthoughtslite-expert-medical positive_prompts: - "Medical" - "Biology" - "Science" - "Sickness" - "Illness" - "emotional reasoning" # Note: Might overlap slightly with general chat, use prompts carefully - source_model: Pinkstack/llama-3.2-superthoughts-lite-expert-code positive_prompts: - "Code generation" - "Debugging" - "Finish code" - "Explain code" - "Refine code" - "Coding assistance" # --- MoE Specific Parameters --- # num_experts_per_tok: How many experts to activate per token during inference. # Common values are 1 or 2. Using 2 often provides better quality. num_experts_per_tok: 2