MCP server exposing local Ollama models via LiteLLM proxy to Claude Code. Tools: query_local_model, review_code, summarize, generate_boilerplate, list_models. Deployed to k8s ai-inference namespace via ArgoCD. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
96 lines
2.3 KiB
YAML
96 lines
2.3 KiB
YAML
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ollama-mcp
|
|
namespace: ai-inference
|
|
labels:
|
|
app: ollama-mcp
|
|
annotations:
|
|
argocd.argoproj.io/sync-wave: "10"
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: ollama-mcp
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: ollama-mcp
|
|
spec:
|
|
containers:
|
|
- name: ollama-mcp
|
|
image: registry.storedbox.net/ollama-mcp:latest
|
|
ports:
|
|
- containerPort: 8090
|
|
name: http
|
|
protocol: TCP
|
|
env:
|
|
- name: LITELLM_BASE_URL
|
|
value: "http://litellm.ai-inference.svc:4000"
|
|
- name: LITELLM_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ollama-mcp-secrets
|
|
key: LITELLM_API_KEY
|
|
optional: true
|
|
- name: PORT
|
|
value: "8090"
|
|
- name: REQUEST_TIMEOUT
|
|
value: "120"
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8090
|
|
initialDelaySeconds: 15
|
|
periodSeconds: 30
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: 8090
|
|
initialDelaySeconds: 10
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
resources:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "256Mi"
|
|
cpu: "500m"
|
|
affinity:
|
|
nodeAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
nodeSelectorTerms:
|
|
- matchExpressions:
|
|
- key: kubernetes.io/hostname
|
|
operator: NotIn
|
|
values:
|
|
- k3s-control-2
|
|
- k3s-worker-3
|
|
- k3s-worker-4
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ollama-mcp
|
|
namespace: ai-inference
|
|
labels:
|
|
app: ollama-mcp
|
|
annotations:
|
|
metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
|
|
spec:
|
|
type: LoadBalancer
|
|
ports:
|
|
- port: 8090
|
|
targetPort: 8090
|
|
protocol: TCP
|
|
name: http
|
|
selector:
|
|
app: ollama-mcp
|
|
---
|
|
# Create the LiteLLM API key secret before deploying:
|
|
# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
|
|
# --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e
|