feat(mlc-llm): QwQ-32B speculative eagle decoding

This commit is contained in:
JJGadgets
2025-04-05 22:31:10 +08:00
parent 832e319038
commit ada60a2361

View File

@@ -140,14 +140,14 @@ spec:
env:
<<: *envMain
MODEL: &qwq-model "QwQ-32B-q4f16_1-MLC"
SPECULATIVE_DECODING: "eagle"
SPECULATIVE_DECODING: "small_draft"
SPECULATIVE_DECODING_MODEL: "Qwen2.5-0.5B-Instruct-q0f16-MLC"
resources:
requests:
cpu: "10m"
limits:
cpu: "1000m"
memory: "40Gi"
memory: "30Gi"
gpu.intel.com/i915: "1"
qwq-pull:
<<: *job