Initial commit: Ollama MCP server
MCP server exposing local Ollama models via LiteLLM proxy to Claude Code. Tools: query_local_model, review_code, summarize, generate_boilerplate, list_models. Deployed to k8s ai-inference namespace via ArgoCD. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
65
.gitea/workflows/build-deploy.yml
Normal file
65
.gitea/workflows/build-deploy.yml
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
name: Build and Deploy
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
run: |
|
||||||
|
docker login registry.storedbox.net \
|
||||||
|
-u ${{ secrets.DOCKER_USER }} \
|
||||||
|
-p ${{ secrets.DOCKER_PASSWORD }}
|
||||||
|
|
||||||
|
docker build -t registry.storedbox.net/ollama-mcp:${{ github.sha }} .
|
||||||
|
docker tag registry.storedbox.net/ollama-mcp:${{ github.sha }} \
|
||||||
|
registry.storedbox.net/ollama-mcp:latest
|
||||||
|
|
||||||
|
docker push registry.storedbox.net/ollama-mcp:${{ github.sha }}
|
||||||
|
docker push registry.storedbox.net/ollama-mcp:latest
|
||||||
|
|
||||||
|
deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build-push
|
||||||
|
if: github.ref == 'refs/heads/main'
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up kubectl
|
||||||
|
uses: azure/setup-kubectl@v3
|
||||||
|
|
||||||
|
- name: Configure kubectl
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.KUBE_CONFIG }}" > kubeconfig
|
||||||
|
export KUBECONFIG=kubeconfig
|
||||||
|
|
||||||
|
- name: Apply k8s manifests
|
||||||
|
run: |
|
||||||
|
kubectl apply -f k8s/deployment.yaml --kubeconfig=kubeconfig
|
||||||
|
|
||||||
|
- name: Rollout restart to pull latest image
|
||||||
|
run: |
|
||||||
|
kubectl rollout restart deployment/ollama-mcp \
|
||||||
|
-n ai-inference --kubeconfig=kubeconfig
|
||||||
|
kubectl rollout status deployment/ollama-mcp \
|
||||||
|
-n ai-inference --kubeconfig=kubeconfig --timeout=120s
|
||||||
|
|
||||||
|
- name: Health check
|
||||||
|
run: |
|
||||||
|
sleep 10
|
||||||
|
MCP_IP=$(kubectl get svc ollama-mcp -n ai-inference \
|
||||||
|
-o jsonpath='{.status.loadBalancer.ingress[0].ip}' \
|
||||||
|
--kubeconfig=kubeconfig)
|
||||||
|
curl -f http://${MCP_IP}:8090/health || exit 1
|
||||||
|
echo "MCP server healthy at http://${MCP_IP}:8090"
|
||||||
20
Dockerfile
Normal file
20
Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY src/ ./src/
|
||||||
|
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PORT=8090 \
|
||||||
|
LITELLM_BASE_URL=http://litellm.ai-inference.svc:4000 \
|
||||||
|
REQUEST_TIMEOUT=120
|
||||||
|
|
||||||
|
EXPOSE 8090
|
||||||
|
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8090/health')"
|
||||||
|
|
||||||
|
CMD ["python", "src/server.py"]
|
||||||
43
k8s/argocd-app.yaml
Normal file
43
k8s/argocd-app.yaml
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
---
|
||||||
|
# ArgoCD Application - deploy this once to bootstrap:
|
||||||
|
# kubectl apply -f k8s/argocd-app.yaml
|
||||||
|
#
|
||||||
|
# Pre-requisite: Add the repo to ArgoCD first:
|
||||||
|
# argocd repo add https://repo.adservio.us/ai_approver/ollama-mcp.git \
|
||||||
|
# --username <gitea-user> --password <gitea-token>
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: ollama-mcp
|
||||||
|
namespace: argocd
|
||||||
|
finalizers:
|
||||||
|
- resources-finalizer.argocd.argoproj.io
|
||||||
|
spec:
|
||||||
|
project: default
|
||||||
|
source:
|
||||||
|
repoURL: https://repo.adservio.us/ai_approver/ollama-mcp.git
|
||||||
|
targetRevision: main
|
||||||
|
path: k8s
|
||||||
|
directory:
|
||||||
|
exclude: argocd-app.yaml
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: ai-inference
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
allowEmpty: false
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
retry:
|
||||||
|
limit: 5
|
||||||
|
backoff:
|
||||||
|
duration: 5s
|
||||||
|
factor: 2
|
||||||
|
maxDuration: 3m
|
||||||
|
ignoreDifferences:
|
||||||
|
- group: apps
|
||||||
|
kind: Deployment
|
||||||
|
jsonPointers:
|
||||||
|
- /spec/replicas
|
||||||
95
k8s/deployment.yaml
Normal file
95
k8s/deployment.yaml
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ollama-mcp
|
||||||
|
namespace: ai-inference
|
||||||
|
labels:
|
||||||
|
app: ollama-mcp
|
||||||
|
annotations:
|
||||||
|
argocd.argoproj.io/sync-wave: "10"
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: ollama-mcp
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ollama-mcp
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: ollama-mcp
|
||||||
|
image: registry.storedbox.net/ollama-mcp:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 8090
|
||||||
|
name: http
|
||||||
|
protocol: TCP
|
||||||
|
env:
|
||||||
|
- name: LITELLM_BASE_URL
|
||||||
|
value: "http://litellm.ai-inference.svc:4000"
|
||||||
|
- name: LITELLM_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: ollama-mcp-secrets
|
||||||
|
key: LITELLM_API_KEY
|
||||||
|
optional: true
|
||||||
|
- name: PORT
|
||||||
|
value: "8090"
|
||||||
|
- name: REQUEST_TIMEOUT
|
||||||
|
value: "120"
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8090
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 8090
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: kubernetes.io/hostname
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- k3s-control-2
|
||||||
|
- k3s-worker-3
|
||||||
|
- k3s-worker-4
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: ollama-mcp
|
||||||
|
namespace: ai-inference
|
||||||
|
labels:
|
||||||
|
app: ollama-mcp
|
||||||
|
annotations:
|
||||||
|
metallb.universe.tf/loadBalancerIPs: "192.168.87.29"
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
ports:
|
||||||
|
- port: 8090
|
||||||
|
targetPort: 8090
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
app: ollama-mcp
|
||||||
|
---
|
||||||
|
# Create the LiteLLM API key secret before deploying:
|
||||||
|
# kubectl create secret generic ollama-mcp-secrets -n ai-inference \
|
||||||
|
# --from-literal=LITELLM_API_KEY=a699d6c80639dcf56d5fb8f2a99e50d220b5189dcc2fa1fdc8ccee4dab4df77e
|
||||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
mcp>=1.0.0
|
||||||
|
httpx>=0.27.0
|
||||||
|
starlette>=0.41.0
|
||||||
|
uvicorn[standard]>=0.32.0
|
||||||
321
src/server.py
Normal file
321
src/server.py
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
"""
|
||||||
|
Ollama MCP Server
|
||||||
|
Exposes local LLM models (via LiteLLM proxy) as MCP tools for Claude Code.
|
||||||
|
Runs as an HTTP/SSE MCP server in Kubernetes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import httpx
|
||||||
|
import logging
|
||||||
|
from mcp.server import Server
|
||||||
|
from mcp.server.sse import SseServerTransport
|
||||||
|
from mcp.types import Tool, TextContent
|
||||||
|
from starlette.applications import Starlette
|
||||||
|
from starlette.routing import Route, Mount
|
||||||
|
from starlette.requests import Request
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
LITELLM_BASE_URL = os.environ.get("LITELLM_BASE_URL", "http://litellm.ai-inference.svc:4000")
|
||||||
|
LITELLM_API_KEY = os.environ.get("LITELLM_API_KEY", "")
|
||||||
|
REQUEST_TIMEOUT = int(os.environ.get("REQUEST_TIMEOUT", "120"))
|
||||||
|
|
||||||
|
# Model aliases exposed to Claude Code
|
||||||
|
MODELS = {
|
||||||
|
"deepseek-coder": "local/deepseek-coder", # deepseek-coder-v2:16b - code tasks
|
||||||
|
"llama3": "local/llama3", # llama3.1:8b - general tasks
|
||||||
|
"codellama": "local/codellama", # codellama:13b - code generation
|
||||||
|
"qwen-coder": "local/qwen-coder", # qwen2.5-coder:14b - code tasks
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFAULT_CODE_MODEL = "local/qwen-coder"
|
||||||
|
DEFAULT_GENERAL_MODEL = "local/llama3"
|
||||||
|
|
||||||
|
app = Server("ollama-mcp")
|
||||||
|
|
||||||
|
|
||||||
|
async def call_litellm(model: str, messages: list, max_tokens: int = 2048) -> str:
|
||||||
|
"""Call LiteLLM proxy with the given model and messages."""
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if LITELLM_API_KEY:
|
||||||
|
headers["Authorization"] = f"Bearer {LITELLM_API_KEY}"
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"max_tokens": max_tokens,
|
||||||
|
"stream": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(timeout=REQUEST_TIMEOUT) as client:
|
||||||
|
response = await client.post(
|
||||||
|
f"{LITELLM_BASE_URL}/v1/chat/completions",
|
||||||
|
headers=headers,
|
||||||
|
json=payload,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return data["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
|
||||||
|
@app.list_tools()
|
||||||
|
async def list_tools() -> list[Tool]:
|
||||||
|
return [
|
||||||
|
Tool(
|
||||||
|
name="query_local_model",
|
||||||
|
description=(
|
||||||
|
"Send a prompt to a local Ollama model via LiteLLM. "
|
||||||
|
"Use for tasks that don't require Claude's reasoning: summarization, "
|
||||||
|
"formatting, simple code generation, boilerplate, regex, quick lookups. "
|
||||||
|
f"Available models: {', '.join(MODELS.keys())}. "
|
||||||
|
"Defaults to qwen-coder for code tasks, llama3 for general."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The prompt to send to the model.",
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": list(MODELS.keys()),
|
||||||
|
"description": "Model to use. Omit to auto-select based on task type.",
|
||||||
|
},
|
||||||
|
"task_type": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["code", "general"],
|
||||||
|
"description": "Task type for auto model selection when model is not specified.",
|
||||||
|
"default": "general",
|
||||||
|
},
|
||||||
|
"system_prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional system prompt to guide model behavior.",
|
||||||
|
},
|
||||||
|
"max_tokens": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Maximum tokens in response (default: 2048).",
|
||||||
|
"default": 2048,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["prompt"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="review_code",
|
||||||
|
description=(
|
||||||
|
"Review code for bugs, style issues, and improvements using a local code model. "
|
||||||
|
"Uses deepseek-coder-v2:16b. Good for quick reviews without sending code to Anthropic."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The code to review.",
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Programming language (e.g. python, go, typescript).",
|
||||||
|
},
|
||||||
|
"focus": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional focus area: bugs, style, security, performance, tests.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["code"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="summarize",
|
||||||
|
description=(
|
||||||
|
"Summarize text, logs, or documentation using a local model. "
|
||||||
|
"Use for large log files, changelogs, or documentation that doesn't need Claude's analysis."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"text": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Text to summarize.",
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["bullet_points", "paragraph", "key_facts"],
|
||||||
|
"description": "Output format for the summary.",
|
||||||
|
"default": "bullet_points",
|
||||||
|
},
|
||||||
|
"max_length": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["short", "medium", "detailed"],
|
||||||
|
"description": "Desired summary length.",
|
||||||
|
"default": "medium",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["text"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="generate_boilerplate",
|
||||||
|
description=(
|
||||||
|
"Generate boilerplate code, configs, or scaffolding using a local code model. "
|
||||||
|
"Use for Dockerfiles, k8s manifests, CI configs, test stubs, etc."
|
||||||
|
),
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"description": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "What to generate (e.g. 'Dockerfile for a Python FastAPI app').",
|
||||||
|
},
|
||||||
|
"context": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional additional context or constraints.",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["description"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
Tool(
|
||||||
|
name="list_models",
|
||||||
|
description="List available local Ollama models and their use cases.",
|
||||||
|
inputSchema={
|
||||||
|
"type": "object",
|
||||||
|
"properties": {},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@app.call_tool()
|
||||||
|
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||||
|
try:
|
||||||
|
if name == "list_models":
|
||||||
|
model_info = "\n".join([
|
||||||
|
"Available local models (routed via LiteLLM):",
|
||||||
|
"- deepseek-coder: deepseek-coder-v2:16b — code review, debugging, analysis",
|
||||||
|
"- llama3: llama3.1:8b — general tasks, summarization, Q&A",
|
||||||
|
"- codellama: codellama:13b — code generation, completion",
|
||||||
|
"- qwen-coder: qwen2.5-coder:14b — code tasks, default for code",
|
||||||
|
"",
|
||||||
|
f"LiteLLM endpoint: {LITELLM_BASE_URL}",
|
||||||
|
])
|
||||||
|
return [TextContent(type="text", text=model_info)]
|
||||||
|
|
||||||
|
elif name == "query_local_model":
|
||||||
|
prompt = arguments["prompt"]
|
||||||
|
task_type = arguments.get("task_type", "general")
|
||||||
|
system_prompt = arguments.get("system_prompt")
|
||||||
|
max_tokens = arguments.get("max_tokens", 2048)
|
||||||
|
|
||||||
|
if "model" in arguments:
|
||||||
|
model = MODELS[arguments["model"]]
|
||||||
|
elif task_type == "code":
|
||||||
|
model = DEFAULT_CODE_MODEL
|
||||||
|
else:
|
||||||
|
model = DEFAULT_GENERAL_MODEL
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
if system_prompt:
|
||||||
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
result = await call_litellm(model, messages, max_tokens)
|
||||||
|
return [TextContent(type="text", text=result)]
|
||||||
|
|
||||||
|
elif name == "review_code":
|
||||||
|
code = arguments["code"]
|
||||||
|
language = arguments.get("language", "")
|
||||||
|
focus = arguments.get("focus", "general code quality")
|
||||||
|
|
||||||
|
lang_str = f" {language}" if language else ""
|
||||||
|
system = (
|
||||||
|
f"You are an expert{lang_str} code reviewer. "
|
||||||
|
"Be concise and actionable. Flag actual issues, not style preferences unless asked. "
|
||||||
|
"Format: list issues with severity (critical/warning/info), then suggestions."
|
||||||
|
)
|
||||||
|
prompt = f"Review this{lang_str} code focusing on {focus}:\n\n```{language}\n{code}\n```"
|
||||||
|
|
||||||
|
result = await call_litellm("local/deepseek-coder", [
|
||||||
|
{"role": "system", "content": system},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
])
|
||||||
|
return [TextContent(type="text", text=result)]
|
||||||
|
|
||||||
|
elif name == "summarize":
|
||||||
|
text = arguments["text"]
|
||||||
|
fmt = arguments.get("format", "bullet_points")
|
||||||
|
length = arguments.get("max_length", "medium")
|
||||||
|
|
||||||
|
length_guide = {"short": "2-3 sentences", "medium": "5-8 points", "detailed": "comprehensive"}
|
||||||
|
fmt_guide = {
|
||||||
|
"bullet_points": "Use bullet points.",
|
||||||
|
"paragraph": "Write in paragraph form.",
|
||||||
|
"key_facts": "Extract only key facts and numbers.",
|
||||||
|
}
|
||||||
|
|
||||||
|
system = "You are a concise technical summarizer. Extract the most important information."
|
||||||
|
prompt = (
|
||||||
|
f"Summarize the following text. {fmt_guide[fmt]} "
|
||||||
|
f"Length: {length_guide[length]}.\n\n{text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
result = await call_litellm(DEFAULT_GENERAL_MODEL, [
|
||||||
|
{"role": "system", "content": system},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
])
|
||||||
|
return [TextContent(type="text", text=result)]
|
||||||
|
|
||||||
|
elif name == "generate_boilerplate":
|
||||||
|
description = arguments["description"]
|
||||||
|
context = arguments.get("context", "")
|
||||||
|
|
||||||
|
system = (
|
||||||
|
"You are an expert DevOps and software engineer. "
|
||||||
|
"Generate clean, production-ready boilerplate. "
|
||||||
|
"Output only the file content with no explanation unless asked."
|
||||||
|
)
|
||||||
|
prompt = f"Generate: {description}"
|
||||||
|
if context:
|
||||||
|
prompt += f"\n\nContext/constraints:\n{context}"
|
||||||
|
|
||||||
|
result = await call_litellm(DEFAULT_CODE_MODEL, [
|
||||||
|
{"role": "system", "content": system},
|
||||||
|
{"role": "user", "content": prompt},
|
||||||
|
])
|
||||||
|
return [TextContent(type="text", text=result)]
|
||||||
|
|
||||||
|
else:
|
||||||
|
return [TextContent(type="text", text=f"Unknown tool: {name}")]
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
logger.error("LiteLLM HTTP error: %s", e)
|
||||||
|
return [TextContent(type="text", text=f"LiteLLM error {e.response.status_code}: {e.response.text}")]
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Tool error: %s", e)
|
||||||
|
return [TextContent(type="text", text=f"Error: {str(e)}")]
|
||||||
|
|
||||||
|
|
||||||
|
def create_starlette_app() -> Starlette:
|
||||||
|
sse = SseServerTransport("/messages/")
|
||||||
|
|
||||||
|
async def handle_sse(request: Request):
|
||||||
|
async with sse.connect_sse(request.scope, request.receive, request._send) as streams:
|
||||||
|
await app.run(streams[0], streams[1], app.create_initialization_options())
|
||||||
|
|
||||||
|
return Starlette(
|
||||||
|
routes=[
|
||||||
|
Route("/sse", endpoint=handle_sse),
|
||||||
|
Mount("/messages/", app=sse.handle_post_message),
|
||||||
|
Route("/health", endpoint=lambda r: __import__("starlette.responses", fromlist=["JSONResponse"]).JSONResponse({"status": "ok"})),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
port = int(os.environ.get("PORT", "8090"))
|
||||||
|
logger.info("Starting Ollama MCP server on port %d", port)
|
||||||
|
logger.info("LiteLLM base URL: %s", LITELLM_BASE_URL)
|
||||||
|
uvicorn.run(create_starlette_app(), host="0.0.0.0", port=port)
|
||||||
Reference in New Issue
Block a user