Initial commit: Ollama MCP server

MCP server exposing local Ollama models via LiteLLM proxy to Claude Code. Tools: query_local_model, review_code, summarize, generate_boilerplate, list_models. Deployed to k8s ai-inference namespace via ArgoCD. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-21 17:33:56 +00:00
commit 139a038505
6 changed files with 548 additions and 0 deletions
--- a/k8s/argocd-app.yaml
+++ b/k8s/argocd-app.yaml
@@ -0,0 +1,43 @@
+---
+# ArgoCD Application - deploy this once to bootstrap:
+# kubectl apply -f k8s/argocd-app.yaml
+#
+# Pre-requisite: Add the repo to ArgoCD first:
+# argocd repo add https://repo.adservio.us/ai_approver/ollama-mcp.git \
+#   --username <gitea-user> --password <gitea-token>
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: ollama-mcp
+  namespace: argocd
+  finalizers:
+    - resources-finalizer.argocd.argoproj.io
+spec:
+  project: default
+  source:
+    repoURL: https://repo.adservio.us/ai_approver/ollama-mcp.git
+    targetRevision: main
+    path: k8s
+    directory:
+      exclude: argocd-app.yaml
+  destination:
+    server: https://kubernetes.default.svc
+    namespace: ai-inference
+  syncPolicy:
+    automated:
+      prune: true
+      selfHeal: true
+      allowEmpty: false
+    syncOptions:
+      - CreateNamespace=true
+    retry:
+      limit: 5
+      backoff:
+        duration: 5s
+        factor: 2
+        maxDuration: 3m
+  ignoreDifferences:
+    - group: apps
+      kind: Deployment
+      jsonPointers:
+        - /spec/replicas
--- a/k8s/deployment.yaml
+++ b/k8s/deployment.yaml
@@ -0,0 +1,95 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ollama-mcp
+  namespace: ai-inference
+  labels:
+    app: ollama-mcp
+  annotations:
+    argocd.argoproj.io/sync-wave: "10"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: ollama-mcp
+  template:
+    metadata:
+      labels:
+        app: ollama-mcp
+    spec:
+      containers:
+      - name: ollama-mcp
+        image: registry.storedbox.net/ollama-mcp:latest
+        ports:
+        - containerPort: 8090
+          name: http
+          protocol: TCP
+        env:
+        - name: LITELLM_BASE_URL
+          value: "http://litellm.ai-inference.svc:4000"
+        - name: LITELLM_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: ollama-mcp-secrets
+              key: LITELLM_API_KEY
+              optional: true
+        - name: PORT
+          value: "8090"
+        - name: REQUEST_TIMEOUT
+          value: "120"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8090
+          initialDelaySeconds: 15
+          periodSeconds: 30
+          timeoutSeconds: 5
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: 8090
+          initialDelaySeconds: 10
+          periodSeconds: 10
+          timeoutSeconds: 5
+        resources:
+          requests:
+            memory: "128Mi"
+            cpu: "100m"
+          limits:
+            memory: "256Mi"
+            cpu: "500m"
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: kubernetes.io/hostname
+                operator: NotIn
+                values:
+                - k3s-control-2
+                - k3s-worker-3
+                - k3s-worker-4
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ollama-mcp
+  namespace: ai-inference
+  labels:
+    app: ollama-mcp
+  annotations:
+    metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
+spec:
+  type: LoadBalancer
+  ports:
+  - port: 8090
+    targetPort: 8090
+    protocol: TCP
+    name: http
+  selector:
+    app: ollama-mcp
+---
+# Create the LiteLLM API key secret before deploying:
+# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
+#   --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e