ollama-mcp/k8s/deployment.yaml

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: ollama-mcp
  namespace: ai-inference
  labels:
    app: ollama-mcp
  annotations:
    argocd.argoproj.io/sync-wave: "10"
spec:
  replicas: 1
  selector:
    matchLabels:
      app: ollama-mcp
  template:
    metadata:
      labels:
        app: ollama-mcp
    spec:
      containers:
      - name: ollama-mcp
        image: registry.storedbox.net/ollama-mcp:latest
        ports:
        - containerPort: 8090
          name: http
          protocol: TCP
        env:
        - name: LITELLM_BASE_URL
          value: "http://litellm.ai-inference.svc:4000"
        - name: LITELLM_API_KEY
          valueFrom:
            secretKeyRef:
              name: ollama-mcp-secrets
              key: LITELLM_API_KEY
              optional: true
        - name: PORT
          value: "8090"
        - name: REQUEST_TIMEOUT
          value: "120"
        livenessProbe:
          httpGet:
            path: /health
            port: 8090
          initialDelaySeconds: 15
          periodSeconds: 30
          timeoutSeconds: 5
        readinessProbe:
          httpGet:
            path: /health
            port: 8090
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "256Mi"
            cpu: "500m"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/hostname
                operator: NotIn
                values:
                - k3s-control-2
                - k3s-worker-3
                - k3s-worker-4
---
apiVersion: v1
kind: Service
metadata:
  name: ollama-mcp
  namespace: ai-inference
  labels:
    app: ollama-mcp
  annotations:
    metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
spec:
  type: LoadBalancer
  ports:
  - port: 8090
    targetPort: 8090
    protocol: TCP
    name: http
  selector:
    app: ollama-mcp
---
# Create the LiteLLM API key secret before deploying:
# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
#   --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e