base_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat
gate_mode: hidden # Common gating mechanism using hidden states. Alternatives: 'cheap_embed', 'random'
dtype: float16 # Use float16 to save memory/disk space, common for inference

experts:
  - source_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat
    positive_prompts:
      - "General use"
      - "Conversational"
      - "Question answering"
      - "Multilingual"
      - "Translation"
      - "Roleplay"
  - source_model: Pinkstack/llama-3.2-superthoughts-expert-math
    positive_prompts:
      - "Mathematical"
      - "Algebra"
      - "Shape understanding"
      - "counting problem"
      - "Explain math"
      - "placing objects"
  - source_model: Pinkstack/llama-3.2-superthoughtslite-expert-medical
    positive_prompts:
      - "Medical"
      - "Biology"
      - "Science"
      - "Sickness"
      - "Illness"
      - "emotional reasoning" # Note: Might overlap slightly with general chat, use prompts carefully
  - source_model: Pinkstack/llama-3.2-superthoughts-lite-expert-code
    positive_prompts:
      - "Code generation"
      - "Debugging"
      - "Finish code"
      - "Explain code"
      - "Refine code"
      - "Coding assistance"

# --- MoE Specific Parameters ---
# num_experts_per_tok: How many experts to activate per token during inference.
# Common values are 1 or 2. Using 2 often provides better quality.
num_experts_per_tok: 2