--- apiVersion: apps/v1 kind: Deployment metadata: name: ollama-mcp namespace: ai-inference labels: app: ollama-mcp annotations: argocd.argoproj.io/sync-wave: "10" spec: replicas: 1 selector: matchLabels: app: ollama-mcp template: metadata: labels: app: ollama-mcp spec: containers: - name: ollama-mcp image: registry.storedbox.net/ollama-mcp:latest ports: - containerPort: 8090 name: http protocol: TCP env: - name: LITELLM_BASE_URL value: "http://litellm.ai-inference.svc:4000" - name: LITELLM_API_KEY valueFrom: secretKeyRef: name: ollama-mcp-secrets key: LITELLM_API_KEY optional: true - name: PORT value: "8090" - name: REQUEST_TIMEOUT value: "120" livenessProbe: httpGet: path: /health port: 8090 initialDelaySeconds: 15 periodSeconds: 30 timeoutSeconds: 5 readinessProbe: httpGet: path: /health port: 8090 initialDelaySeconds: 10 periodSeconds: 10 timeoutSeconds: 5 resources: requests: memory: "128Mi" cpu: "100m" limits: memory: "256Mi" cpu: "500m" affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: kubernetes.io/hostname operator: NotIn values: - k3s-control-2 - k3s-worker-3 - k3s-worker-4 --- apiVersion: v1 kind: Service metadata: name: ollama-mcp namespace: ai-inference labels: app: ollama-mcp annotations: metallb.universe.tf/loadBalancerIPs: "192.168.87.29" spec: type: LoadBalancer ports: - port: 8090 targetPort: 8090 protocol: TCP name: http selector: app: ollama-mcp --- # Create the LiteLLM API key secret before deploying: # kubectl create secret generic ollama-mcp-secrets -n ai-inference \ # --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e