Initial commit: Ollama MCP server
Some checks failed
Build and Deploy / build-push (push) Has been cancelled
Build and Deploy / deploy (push) Has been cancelled

MCP server exposing local Ollama models via LiteLLM proxy to Claude Code.
Tools: query_local_model, review_code, summarize, generate_boilerplate, list_models.
Deployed to k8s ai-inference namespace via ArgoCD.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-21 17:33:56 +00:00
commit 139a038505
6 changed files with 548 additions and 0 deletions

43
k8s/argocd-app.yaml Normal file
View File

@@ -0,0 +1,43 @@
---
# ArgoCD Application - deploy this once to bootstrap:
# kubectl apply -f k8s/argocd-app.yaml
#
# Pre-requisite: Add the repo to ArgoCD first:
# argocd repo add https://repo.adservio.us/ai_approver/ollama-mcp.git \
# --username <gitea-user> --password <gitea-token>
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: ollama-mcp
namespace: argocd
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
project: default
source:
repoURL: https://repo.adservio.us/ai_approver/ollama-mcp.git
targetRevision: main
path: k8s
directory:
exclude: argocd-app.yaml
destination:
server: https://kubernetes.default.svc
namespace: ai-inference
syncPolicy:
automated:
prune: true
selfHeal: true
allowEmpty: false
syncOptions:
- CreateNamespace=true
retry:
limit: 5
backoff:
duration: 5s
factor: 2
maxDuration: 3m
ignoreDifferences:
- group: apps
kind: Deployment
jsonPointers:
- /spec/replicas

95
k8s/deployment.yaml Normal file
View File

@@ -0,0 +1,95 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama-mcp
namespace: ai-inference
labels:
app: ollama-mcp
annotations:
argocd.argoproj.io/sync-wave: "10"
spec:
replicas: 1
selector:
matchLabels:
app: ollama-mcp
template:
metadata:
labels:
app: ollama-mcp
spec:
containers:
- name: ollama-mcp
image: registry.storedbox.net/ollama-mcp:latest
ports:
- containerPort: 8090
name: http
protocol: TCP
env:
- name: LITELLM_BASE_URL
value: "http://litellm.ai-inference.svc:4000"
- name: LITELLM_API_KEY
valueFrom:
secretKeyRef:
name: ollama-mcp-secrets
key: LITELLM_API_KEY
optional: true
- name: PORT
value: "8090"
- name: REQUEST_TIMEOUT
value: "120"
livenessProbe:
httpGet:
path: /health
port: 8090
initialDelaySeconds: 15
periodSeconds: 30
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /health
port: 8090
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "256Mi"
cpu: "500m"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- k3s-control-2
- k3s-worker-3
- k3s-worker-4
---
apiVersion: v1
kind: Service
metadata:
name: ollama-mcp
namespace: ai-inference
labels:
app: ollama-mcp
annotations:
metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
spec:
type: LoadBalancer
ports:
- port: 8090
targetPort: 8090
protocol: TCP
name: http
selector:
app: ollama-mcp
---
# Create the LiteLLM API key secret before deploying:
# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
# --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e