Initial commit: Ollama MCP server
MCP server exposing local Ollama models via LiteLLM proxy to Claude Code. Tools: query_local_model, review_code, summarize, generate_boilerplate, list_models. Deployed to k8s ai-inference namespace via ArgoCD. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
43
k8s/argocd-app.yaml
Normal file
43
k8s/argocd-app.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
---
|
||||
# ArgoCD Application - deploy this once to bootstrap:
|
||||
# kubectl apply -f k8s/argocd-app.yaml
|
||||
#
|
||||
# Pre-requisite: Add the repo to ArgoCD first:
|
||||
# argocd repo add https://repo.adservio.us/ai_approver/ollama-mcp.git \
|
||||
# --username <gitea-user> --password <gitea-token>
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: ollama-mcp
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://repo.adservio.us/ai_approver/ollama-mcp.git
|
||||
targetRevision: main
|
||||
path: k8s
|
||||
directory:
|
||||
exclude: argocd-app.yaml
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: ai-inference
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
ignoreDifferences:
|
||||
- group: apps
|
||||
kind: Deployment
|
||||
jsonPointers:
|
||||
- /spec/replicas
|
||||
95
k8s/deployment.yaml
Normal file
95
k8s/deployment.yaml
Normal file
@@ -0,0 +1,95 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama-mcp
|
||||
namespace: ai-inference
|
||||
labels:
|
||||
app: ollama-mcp
|
||||
annotations:
|
||||
argocd.argoproj.io/sync-wave: "10"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama-mcp
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: ollama-mcp
|
||||
spec:
|
||||
containers:
|
||||
- name: ollama-mcp
|
||||
image: registry.storedbox.net/ollama-mcp:latest
|
||||
ports:
|
||||
- containerPort: 8090
|
||||
name: http
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: LITELLM_BASE_URL
|
||||
value: "http://litellm.ai-inference.svc:4000"
|
||||
- name: LITELLM_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ollama-mcp-secrets
|
||||
key: LITELLM_API_KEY
|
||||
optional: true
|
||||
- name: PORT
|
||||
value: "8090"
|
||||
- name: REQUEST_TIMEOUT
|
||||
value: "120"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8090
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 8090
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "256Mi"
|
||||
cpu: "500m"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- k3s-control-2
|
||||
- k3s-worker-3
|
||||
- k3s-worker-4
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-mcp
|
||||
namespace: ai-inference
|
||||
labels:
|
||||
app: ollama-mcp
|
||||
annotations:
|
||||
metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
ports:
|
||||
- port: 8090
|
||||
targetPort: 8090
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app: ollama-mcp
|
||||
---
|
||||
# Create the LiteLLM API key secret before deploying:
|
||||
# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
|
||||
# --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e
|
||||
Reference in New Issue
Block a user