mirror of
https://github.com/outbackdingo/Biohazard.git
synced 2026-03-21 19:39:46 +00:00
feat(mlc-llm): QwQ-32B, max seq 16384, enable debug
This commit is contained in:
@@ -29,12 +29,13 @@ spec:
|
||||
image: &img
|
||||
repository: jank.ing/jjgadgets/mlc-llm
|
||||
tag: 0.19.0@sha256:acbe4da65245cdc424eb16de4dd09b6c77fc1dc48f871f04faca4c0365341420
|
||||
args: ["HF://mlc-ai/$(MODEL)", "--enable-debug", "--overrides", "max_num_sequence=1;max_total_seq_length=16384"]
|
||||
args: ["HF://mlc-ai/$(MODEL)", "--enable-debug", "--overrides", "max_num_sequence=1;max_total_seq_length=$(CONTEXT_SIZE)"]
|
||||
env: &envMain
|
||||
TZ: "${CONFIG_TZ}"
|
||||
MLC_JIT_POLICY: "ON"
|
||||
MLC_DOWNLOAD_CACHE_POLICY: "READONLY"
|
||||
MODEL: &llama3-model "Llama-3.2-3B-Instruct-q4f16_1-MLC"
|
||||
CONTEXT_SIZE: "32768" # smaller KV cache needed thus larger size
|
||||
securityContext: &sc
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
@@ -45,7 +46,7 @@ spec:
|
||||
cpu: "10m"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "6Gi"
|
||||
memory: "7.5Gi"
|
||||
gpu.intel.com/i915: "1"
|
||||
probes:
|
||||
liveness:
|
||||
@@ -113,13 +114,14 @@ spec:
|
||||
env:
|
||||
<<: *envMain
|
||||
MODEL: &phi3-model "Phi-3.5-mini-instruct-q4f16_1-MLC"
|
||||
CONTEXT_SIZE: "16384"
|
||||
resources:
|
||||
requests:
|
||||
cpu: "10m"
|
||||
memory: "3Gi"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
memory: "16Gi"
|
||||
memory: "9Gi" # 2GB params, 0.4GB tmp, rest KV cache
|
||||
gpu.intel.com/i915: "1"
|
||||
phi3-pull:
|
||||
<<: *job
|
||||
@@ -136,7 +138,7 @@ spec:
|
||||
<<: *mlc
|
||||
env:
|
||||
<<: *envMain
|
||||
MODEL: &qwq-model "mlc-ai/QwQ-32B-q4f16_1-MLC"
|
||||
MODEL: &qwq-model "QwQ-32B-q4f16_1-MLC"
|
||||
resources:
|
||||
requests:
|
||||
cpu: "10m"
|
||||
|
||||
Reference in New Issue
Block a user