diff --git a/kube/deploy/apps/mlc-llm/app/hr.yaml b/kube/deploy/apps/mlc-llm/app/hr.yaml
index 93dd8880..8d0b3416 100644
--- a/kube/deploy/apps/mlc-llm/app/hr.yaml
+++ b/kube/deploy/apps/mlc-llm/app/hr.yaml
@@ -45,7 +45,7 @@ spec:
                 cpu: "10m"
               limits:
                 cpu: "1000m"
-                memory: "12Gi"
+                memory: "6Gi"
                 gpu.intel.com/i915: "1"
             probes:
               liveness:
@@ -84,7 +84,7 @@ spec:
                 cpu: "10m"
               limits:
                 cpu: "1000m"
-                memory: "2Gi"
+                memory: "1Gi"
                 gpu.intel.com/i915: "1"
       codellama:
         <<: *deploy
@@ -94,6 +94,13 @@ spec:
             env:
               <<: *envMain
               MODEL: &codellama-model "CodeLlama-7b-hf-q4f32_1-MLC"
+            resources:
+              requests:
+                cpu: "10m"
+              limits:
+                cpu: "1000m"
+                memory: "12Gi"
+                gpu.intel.com/i915: "1"
       codellama-pull:
         <<: *job
         containers:
@@ -142,17 +149,10 @@ spec:
       misc:
         existingClaim: mlc-llm-misc
         globalMounts:
-          - subPath: cache
-            path: /app/.cache
-          - subPath: testdata
-            path: /app/.tvm_test_data
-          # - subPath: tmp
-          #   path: /tmp # used for downloading models, so why not download straight to disk
-      tmp:
-        type: emptyDir
-        globalMounts:
+          - subPath: data
+            path: /data
           - subPath: tmp
-            path: /tmp
+            path: /tmp # reduce time for moving downloaded model
     defaultPodOptions:
       automountServiceAccountToken: false
       enableServiceLinks: false