新增完整的 K3Cloud MCP 服务器实现,包含配置管理、API 客户端、工具定义和签名验证 更新文件工具 MCP 服务器的 MinIO 配置和上传路径前缀
527 lines
20 KiB
Python
527 lines
20 KiB
Python
import asyncio
|
|
import base64
|
|
import hashlib
|
|
import json
|
|
import mimetypes
|
|
import re
|
|
import tempfile
|
|
import zipfile
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
from xml.etree import ElementTree as ET
|
|
|
|
import httpx
|
|
from minio import Minio
|
|
from mcp.types import TextContent, Tool
|
|
|
|
from .config import Config
|
|
tools: List[Tool] = [
|
|
Tool(
|
|
name="file_to_json",
|
|
description="将文件对象转储为 JSON 字符串",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"file": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {"type": "string"},
|
|
"type": {"type": "string"},
|
|
"size": {"type": "integer"},
|
|
"last_modified": {"type": "integer"},
|
|
"content_base64": {"type": "string"}
|
|
},
|
|
"required": ["name"]
|
|
},
|
|
"file_path": {"type": "string"}
|
|
},
|
|
"required": []
|
|
}
|
|
),
|
|
Tool(
|
|
name="file_to_data_uri",
|
|
description="将文件转换为 data URI(Base64 编码)",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"file": {
|
|
"type": "object",
|
|
"properties": {
|
|
"name": {"type": "string"},
|
|
"type": {"type": "string"},
|
|
"size": {"type": "integer"},
|
|
"last_modified": {"type": "integer"},
|
|
"content_base64": {"type": "string"}
|
|
},
|
|
"required": ["name", "content_base64"]
|
|
},
|
|
"type": {"type": "string"},
|
|
"file_path": {"type": "string"}
|
|
},
|
|
"required": []
|
|
}
|
|
),
|
|
Tool(
|
|
name="file_path_to_data_uri",
|
|
description="将文件路径转换为 data URI(Base64 编码)",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"file_path": {"type": "string"},
|
|
"type": {"type": "string"}
|
|
},
|
|
"required": ["file_path"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="url_to_data_uri",
|
|
description="将文件 URL 转换为 data URI(Base64 编码)",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string"}
|
|
},
|
|
"required": ["url"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="url_to_temp_file",
|
|
description="将文件 URL 下载为临时文件并返回路径",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {"type": "string"},
|
|
"file_path": {"type": "string"}
|
|
},
|
|
"required": ["url"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="excel_image_key_to_temp_file",
|
|
description="根据Excel内image_key定位图片并转为临时文件路径",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"excel_path": {"type": "string"},
|
|
"image_key": {"type": "string"}
|
|
},
|
|
"required": ["excel_path", "image_key"]
|
|
}
|
|
),
|
|
Tool(
|
|
name="upload_file_to_minio",
|
|
description="上传本地文件到MinIO并返回URL",
|
|
inputSchema={
|
|
"type": "object",
|
|
"properties": {
|
|
"file_path": {"type": "string"}
|
|
},
|
|
"required": ["file_path"]
|
|
}
|
|
)
|
|
]
|
|
|
|
|
|
def _guess_mime_type(name: Optional[str]) -> str:
|
|
if not name:
|
|
return "application/octet-stream"
|
|
mime_type, _ = mimetypes.guess_type(name)
|
|
return mime_type or "application/octet-stream"
|
|
|
|
|
|
def _is_url(value: Optional[str]) -> bool:
|
|
if not value:
|
|
return False
|
|
parsed = urlparse(value)
|
|
return parsed.scheme in {"http", "https"} and bool(parsed.netloc)
|
|
|
|
|
|
def _decode_base64(content_base64: str) -> bytes:
|
|
try:
|
|
return base64.b64decode(content_base64, validate=True)
|
|
except Exception:
|
|
return base64.b64decode(content_base64)
|
|
|
|
|
|
def _build_data_uri(data: bytes, mime_type: str) -> str:
|
|
encoded = base64.b64encode(data).decode("ascii")
|
|
return f"data:{mime_type};base64,{encoded}"
|
|
|
|
|
|
def _extract_file_payload(arguments: Dict[str, Any]) -> Tuple[Dict[str, Any], Optional[bytes], Optional[str]]:
|
|
file_payload = arguments.get("file")
|
|
file_path = arguments.get("file_path")
|
|
if file_payload is None and file_path is None:
|
|
raise ValueError("missing file or file_path")
|
|
|
|
name = None
|
|
mime_type = None
|
|
size = None
|
|
last_modified = None
|
|
content_base64 = None
|
|
data = None
|
|
|
|
if file_payload is not None:
|
|
name = file_payload.get("name")
|
|
mime_type = file_payload.get("type")
|
|
size = file_payload.get("size")
|
|
last_modified = file_payload.get("last_modified")
|
|
content_base64 = file_payload.get("content_base64")
|
|
|
|
if file_path:
|
|
path = Path(file_path)
|
|
if name is None:
|
|
name = path.name
|
|
if content_base64 is None:
|
|
data = path.read_bytes()
|
|
|
|
if data is None and content_base64 is not None:
|
|
data = _decode_base64(content_base64)
|
|
|
|
return (
|
|
{
|
|
"name": name,
|
|
"type": mime_type,
|
|
"size": size,
|
|
"last_modified": last_modified,
|
|
"content_base64": content_base64
|
|
},
|
|
data,
|
|
name
|
|
)
|
|
|
|
|
|
def _normalize_mime_type(mime_type: Optional[str], name: Optional[str]) -> str:
|
|
if mime_type:
|
|
return mime_type
|
|
return _guess_mime_type(name)
|
|
|
|
|
|
def _normalize_header(value: Any) -> str:
|
|
if value is None:
|
|
return ""
|
|
return str(value).strip().lower()
|
|
|
|
|
|
def _extract_dispimg_id(value: Any) -> str:
|
|
if value is None:
|
|
return ""
|
|
text = str(value).strip()
|
|
if not text:
|
|
return ""
|
|
match = re.search(r"dispimg\(\s*\"([^\"]+)\"", text, re.IGNORECASE)
|
|
if match:
|
|
return match.group(1).strip()
|
|
match = re.search(r"dispimg\(\s*'([^']+)'", text, re.IGNORECASE)
|
|
if match:
|
|
return match.group(1).strip()
|
|
return text
|
|
|
|
|
|
def _resolve_dispimg_temp_file(excel_path: str, image_key: str) -> str:
|
|
image_id = _extract_dispimg_id(image_key)
|
|
if not image_id:
|
|
raise ValueError("missing dispimg id")
|
|
with zipfile.ZipFile(excel_path) as zip_ref:
|
|
names = set(zip_ref.namelist())
|
|
if "xl/cellimages.xml" not in names:
|
|
raise ValueError("cellimages.xml not found in excel")
|
|
if "xl/_rels/cellimages.xml.rels" not in names:
|
|
raise ValueError("cellimages.xml.rels not found in excel")
|
|
rels_root = ET.fromstring(zip_ref.read("xl/_rels/cellimages.xml.rels"))
|
|
rel_ns = "http://schemas.openxmlformats.org/package/2006/relationships"
|
|
rels_map: Dict[str, str] = {}
|
|
for rel in rels_root.findall(f"{{{rel_ns}}}Relationship"):
|
|
rel_id = rel.attrib.get("Id")
|
|
target = rel.attrib.get("Target")
|
|
if rel_id and target:
|
|
rels_map[rel_id] = target
|
|
cell_root = ET.fromstring(zip_ref.read("xl/cellimages.xml"))
|
|
namespaces = {
|
|
"etc": "http://www.wps.cn/officeDocument/2017/etCustomData",
|
|
"xdr": "http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
|
|
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
|
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
}
|
|
name_to_embed: Dict[str, str] = {}
|
|
for cell_image in cell_root.findall("etc:cellImage", namespaces):
|
|
c_nv_pr = cell_image.find(".//xdr:cNvPr", namespaces)
|
|
blip = cell_image.find(".//a:blip", namespaces)
|
|
name = c_nv_pr.attrib.get("name") if c_nv_pr is not None else ""
|
|
embed_id = blip.attrib.get(f"{{{namespaces['r']}}}embed") if blip is not None else ""
|
|
if name and embed_id:
|
|
name_to_embed[name] = embed_id
|
|
if not name_to_embed:
|
|
raise ValueError("no cell images found in excel")
|
|
candidates = [image_id]
|
|
if image_id.startswith("ID_"):
|
|
candidates.append(image_id[3:])
|
|
else:
|
|
candidates.append(f"ID_{image_id}")
|
|
name_to_embed_lower = {key.lower(): value for key, value in name_to_embed.items()}
|
|
embed_id = ""
|
|
for candidate in candidates:
|
|
if candidate in name_to_embed:
|
|
embed_id = name_to_embed[candidate]
|
|
break
|
|
lower_candidate = candidate.lower()
|
|
if lower_candidate in name_to_embed_lower:
|
|
embed_id = name_to_embed_lower[lower_candidate]
|
|
break
|
|
if not embed_id:
|
|
raise ValueError("dispimg id not found in excel")
|
|
target = rels_map.get(embed_id)
|
|
if not target:
|
|
raise ValueError("dispimg image target not found in excel")
|
|
target_path = f"xl/{target.lstrip('/')}"
|
|
if target_path not in names:
|
|
raise ValueError("dispimg image file missing in excel")
|
|
suffix = Path(target).suffix
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
|
temp_path = Path(temp_file.name)
|
|
temp_path.write_bytes(zip_ref.read(target_path))
|
|
return str(temp_path)
|
|
|
|
|
|
def _resolve_image_source_from_excel(excel_path: str, image_key: str) -> str:
|
|
from openpyxl import load_workbook
|
|
|
|
workbook = load_workbook(excel_path, read_only=True, data_only=True)
|
|
try:
|
|
worksheet = workbook.worksheets[0]
|
|
header_row = next(worksheet.iter_rows(min_row=1, max_row=1, values_only=True), None)
|
|
if not header_row:
|
|
raise ValueError("excel header row is empty")
|
|
header_map: Dict[str, int] = {}
|
|
for index, header in enumerate(header_row):
|
|
name = _normalize_header(header)
|
|
if name:
|
|
header_map[name] = index
|
|
key_col_index = None
|
|
for key_name in ("image_key", "imagekey", "key"):
|
|
if key_name in header_map:
|
|
key_col_index = header_map[key_name]
|
|
break
|
|
if key_col_index is None:
|
|
raise ValueError("missing image_key column in excel")
|
|
candidate_columns = ["image_path", "image_url", "image", "url", "file_path", "path", "image_file"]
|
|
candidate_indices = [header_map[name] for name in candidate_columns if name in header_map]
|
|
matched_source = ""
|
|
for row in worksheet.iter_rows(min_row=2, values_only=True):
|
|
if key_col_index >= len(row):
|
|
continue
|
|
cell_value = row[key_col_index]
|
|
if cell_value is None:
|
|
continue
|
|
if str(cell_value).strip() != image_key:
|
|
continue
|
|
for index in candidate_indices:
|
|
if index < len(row):
|
|
candidate_value = row[index]
|
|
if candidate_value is not None and str(candidate_value).strip():
|
|
matched_source = str(candidate_value).strip()
|
|
break
|
|
if not matched_source:
|
|
matched_source = str(cell_value).strip()
|
|
break
|
|
if not matched_source:
|
|
raise ValueError(f"missing image source for image_key: {image_key}")
|
|
if not _is_url(matched_source):
|
|
if not Path(matched_source).is_absolute():
|
|
matched_source = str(Path(excel_path).parent / matched_source)
|
|
return matched_source
|
|
finally:
|
|
workbook.close()
|
|
|
|
|
|
def _build_local_temp_file(file_path: str) -> str:
|
|
source_path = Path(file_path)
|
|
if not source_path.is_file():
|
|
raise FileNotFoundError(f"image file not found: {file_path}")
|
|
suffix = source_path.suffix
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
|
temp_path = Path(temp_file.name)
|
|
temp_path.write_bytes(source_path.read_bytes())
|
|
return str(temp_path)
|
|
|
|
|
|
def _normalize_minio_endpoint(endpoint: str) -> Tuple[str, Optional[bool], str]:
|
|
raw = str(endpoint).strip()
|
|
if not raw:
|
|
return "", None, ""
|
|
if raw.startswith("http://") or raw.startswith("https://"):
|
|
parsed = urlparse(raw)
|
|
secure = parsed.scheme == "https"
|
|
return parsed.netloc, secure, f"{parsed.scheme}://{parsed.netloc}"
|
|
return raw, None, f"http://{raw}"
|
|
|
|
|
|
def _hash_file_md5(file_path: str) -> str:
|
|
hasher = hashlib.md5()
|
|
with open(file_path, "rb") as handle:
|
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
|
hasher.update(chunk)
|
|
return hasher.hexdigest()
|
|
|
|
|
|
def _upload_file_to_minio_sync(file_path: str) -> str:
|
|
if not file_path:
|
|
raise ValueError("missing file_path")
|
|
source_path = Path(file_path)
|
|
if not source_path.is_file():
|
|
raise FileNotFoundError(f"file_path not found: {file_path}")
|
|
endpoint_raw = Config.MINIO_ENDPOINT
|
|
access_key = Config.MINIO_ACCESS_KEY
|
|
secret_key = Config.MINIO_SECRET_KEY
|
|
if not endpoint_raw:
|
|
raise ValueError("missing minio_endpoint")
|
|
if not access_key:
|
|
raise ValueError("missing minio_access_key")
|
|
if not secret_key:
|
|
raise ValueError("missing minio_secret_key")
|
|
endpoint, endpoint_secure, endpoint_base = _normalize_minio_endpoint(endpoint_raw)
|
|
if not endpoint:
|
|
raise ValueError("invalid minio_endpoint")
|
|
secure = endpoint_secure or False
|
|
client = Minio(endpoint, access_key=access_key, secret_key=secret_key, secure=secure)
|
|
bucket = "lzwcai"
|
|
prefix = "upload"
|
|
if not client.bucket_exists(bucket):
|
|
client.make_bucket(bucket)
|
|
date_str = datetime.now().strftime("%Y-%m-%d")
|
|
file_hash = _hash_file_md5(file_path)
|
|
suffix = source_path.suffix
|
|
object_name = f"{prefix}/{date_str}/{file_hash}/{file_hash}{suffix}"
|
|
content_type = _guess_mime_type(source_path.name)
|
|
client.fput_object(bucket, object_name, file_path, content_type=content_type)
|
|
scheme = "https" if secure else "http"
|
|
public_base = endpoint_base if endpoint_base else f"{scheme}://{endpoint}"
|
|
public_base = f"{public_base.rstrip('/')}/{bucket}"
|
|
return f"{public_base}/{object_name}"
|
|
|
|
|
|
def _build_file_json(arguments: Dict[str, Any]) -> str:
|
|
file_info, data, name = _extract_file_payload(arguments)
|
|
if file_info["type"] is None:
|
|
file_info["type"] = _guess_mime_type(name)
|
|
if file_info["size"] is None and data is not None:
|
|
file_info["size"] = len(data)
|
|
return json.dumps(file_info, ensure_ascii=False)
|
|
|
|
|
|
def _build_file_data_uri(arguments: Dict[str, Any]) -> str:
|
|
file_info, data, name = _extract_file_payload(arguments)
|
|
if data is None:
|
|
raise ValueError("missing file content for data uri")
|
|
mime_type = arguments.get("type") or file_info.get("type")
|
|
mime_type = _normalize_mime_type(mime_type, name)
|
|
return _build_data_uri(data, mime_type)
|
|
|
|
|
|
def _build_path_data_uri(arguments: Dict[str, Any]) -> str:
|
|
file_path = arguments.get("file_path")
|
|
if not file_path:
|
|
raise ValueError("missing file_path")
|
|
path = Path(file_path)
|
|
data = path.read_bytes()
|
|
mime_type = arguments.get("type") or _guess_mime_type(path.name)
|
|
return _build_data_uri(data, mime_type)
|
|
|
|
|
|
async def _build_url_data_uri(arguments: Dict[str, Any]) -> str:
|
|
url = arguments.get("url")
|
|
if not url:
|
|
raise ValueError("missing url")
|
|
async with httpx.AsyncClient(follow_redirects=True, timeout=20) as client:
|
|
response = await client.get(url)
|
|
response.raise_for_status()
|
|
content_type = response.headers.get("content-type", "")
|
|
mime_type = content_type.split(";")[0].strip() if content_type else ""
|
|
if not mime_type:
|
|
mime_type = _guess_mime_type(url)
|
|
return _build_data_uri(response.content, mime_type)
|
|
|
|
|
|
async def _build_url_file_path(arguments: Dict[str, Any]) -> str:
|
|
url = arguments.get("url")
|
|
if not url:
|
|
raise ValueError("missing url")
|
|
file_path = arguments.get("file_path")
|
|
if file_path:
|
|
path = Path(file_path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
else:
|
|
suffix = Path(urlparse(url).path).suffix
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
|
|
path = Path(temp_file.name)
|
|
async with httpx.AsyncClient(follow_redirects=True, timeout=20) as client:
|
|
response = await client.get(url)
|
|
response.raise_for_status()
|
|
path.write_bytes(response.content)
|
|
return str(path)
|
|
|
|
|
|
async def handle_call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]:
|
|
try:
|
|
if name == "file_to_json":
|
|
result = _build_file_json(arguments)
|
|
elif name == "file_to_data_uri":
|
|
file_path = arguments.get("file_path")
|
|
if _is_url(file_path):
|
|
result = await _build_url_data_uri({"url": file_path})
|
|
else:
|
|
result = _build_file_data_uri(arguments)
|
|
elif name == "file_path_to_data_uri":
|
|
file_path = arguments.get("file_path")
|
|
if _is_url(file_path):
|
|
result = await _build_url_data_uri({"url": file_path})
|
|
else:
|
|
result = _build_path_data_uri(arguments)
|
|
elif name == "url_to_data_uri":
|
|
result = await _build_url_data_uri(arguments)
|
|
elif name == "url_to_temp_file":
|
|
result = await _build_url_file_path(arguments)
|
|
elif name == "excel_image_key_to_temp_file":
|
|
excel_path = arguments.get("excel_path")
|
|
image_key = arguments.get("image_key")
|
|
if not excel_path:
|
|
raise ValueError("missing excel_path")
|
|
if not image_key:
|
|
raise ValueError("missing image_key")
|
|
excel_path = str(excel_path).strip()
|
|
image_key = str(image_key).strip()
|
|
path = Path(excel_path)
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"excel_path not found: {excel_path}")
|
|
if path.suffix.lower() in (".xls",):
|
|
raise ValueError("xls is not supported, please convert to xlsx")
|
|
if path.suffix.lower() not in (".xlsx", ".xlsm", ".xltx", ".xltm"):
|
|
raise ValueError("excel_path must be xlsx format")
|
|
dispimg_error: Optional[Exception] = None
|
|
table_error: Optional[Exception] = None
|
|
try:
|
|
result = _resolve_dispimg_temp_file(excel_path, image_key)
|
|
except Exception as exc:
|
|
dispimg_error = exc
|
|
try:
|
|
image_source = _resolve_image_source_from_excel(excel_path, image_key)
|
|
if _is_url(image_source):
|
|
result = await _build_url_file_path({"url": image_source})
|
|
else:
|
|
result = _build_local_temp_file(image_source)
|
|
except Exception as exc2:
|
|
table_error = exc2
|
|
raise ValueError(f"excel image not found: dispimg={dispimg_error}; table={table_error}")
|
|
elif name == "upload_file_to_minio":
|
|
file_path = str(arguments.get("file_path", "")).strip()
|
|
if not file_path:
|
|
raise ValueError("missing file_path")
|
|
result = await asyncio.to_thread(_upload_file_to_minio_sync, file_path)
|
|
else:
|
|
raise ValueError(f"unknown tool name: {name}")
|
|
return [TextContent(type="text", text=result)]
|
|
except Exception as exc:
|
|
return [TextContent(type="text", text=f"Failed to call tool {name}: {exc}")]
|