466 lines
16 KiB
Python
466 lines
16 KiB
Python
import os
|
||
import re
|
||
import subprocess
|
||
from pathlib import Path
|
||
from typing import Any, Dict, Optional, Tuple
|
||
|
||
from fastapi import APIRouter, HTTPException, Header
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# 路由统一挂在 /api/system 下,前端直接调用 /api/system/...
|
||
router = APIRouter(prefix="/api/system")
|
||
|
||
def _get_redis_client_for_logs():
|
||
"""
|
||
获取 Redis 客户端(优先复用 config_manager 的连接;失败则自行创建)。
|
||
返回:redis.Redis 或 None
|
||
"""
|
||
# 1) 复用 config_manager(避免重复连接)
|
||
try:
|
||
import config_manager # backend/config_manager.py(已负责加载 .env)
|
||
|
||
cm = getattr(config_manager, "config_manager", None)
|
||
if cm is not None:
|
||
redis_client = getattr(cm, "_redis_client", None)
|
||
redis_connected = getattr(cm, "_redis_connected", False)
|
||
if redis_client is not None and redis_connected:
|
||
try:
|
||
redis_client.ping()
|
||
return redis_client
|
||
except Exception:
|
||
pass
|
||
except Exception:
|
||
pass
|
||
|
||
# 2) 自行创建
|
||
try:
|
||
import redis # type: ignore
|
||
|
||
redis_url = os.getenv("REDIS_URL", "redis://localhost:6379")
|
||
redis_use_tls = os.getenv("REDIS_USE_TLS", "False").lower() == "true"
|
||
redis_username = os.getenv("REDIS_USERNAME", None)
|
||
redis_password = os.getenv("REDIS_PASSWORD", None)
|
||
ssl_cert_reqs = os.getenv("REDIS_SSL_CERT_REQS", "required")
|
||
ssl_ca_certs = os.getenv("REDIS_SSL_CA_CERTS", None)
|
||
|
||
kwargs: Dict[str, Any] = {
|
||
"decode_responses": True,
|
||
"username": redis_username,
|
||
"password": redis_password,
|
||
"socket_connect_timeout": 1,
|
||
"socket_timeout": 1,
|
||
}
|
||
if redis_url.startswith("rediss://") or redis_use_tls:
|
||
kwargs["ssl_cert_reqs"] = ssl_cert_reqs
|
||
if ssl_ca_certs:
|
||
kwargs["ssl_ca_certs"] = ssl_ca_certs
|
||
if ssl_cert_reqs == "none":
|
||
kwargs["ssl_check_hostname"] = False
|
||
elif ssl_cert_reqs == "required":
|
||
kwargs["ssl_check_hostname"] = True
|
||
else:
|
||
kwargs["ssl_check_hostname"] = False
|
||
|
||
client = redis.from_url(redis_url, **kwargs)
|
||
client.ping()
|
||
return client
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
@router.get("/logs")
|
||
async def get_logs(
|
||
limit: int = 200,
|
||
service: Optional[str] = None,
|
||
level: Optional[str] = None,
|
||
x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token"),
|
||
) -> Dict[str, Any]:
|
||
"""
|
||
从 Redis List 读取最新日志(默认 ats:logs:error)。
|
||
|
||
参数:
|
||
- limit: 返回条数(最大 2000)
|
||
- service: 过滤(backend / trading_system)
|
||
- level: 过滤(ERROR / CRITICAL ...)
|
||
"""
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
if limit <= 0:
|
||
limit = 200
|
||
if limit > 2000:
|
||
limit = 2000
|
||
|
||
list_key = os.getenv("REDIS_LOG_LIST_KEY", "ats:logs:error").strip() or "ats:logs:error"
|
||
|
||
client = _get_redis_client_for_logs()
|
||
if client is None:
|
||
raise HTTPException(status_code=503, detail="Redis 不可用,无法读取日志")
|
||
|
||
try:
|
||
raw_items = client.lrange(list_key, 0, limit - 1)
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"读取 Redis 日志失败: {e}")
|
||
|
||
items: list[Dict[str, Any]] = []
|
||
for raw in raw_items or []:
|
||
try:
|
||
obj = raw
|
||
if isinstance(raw, bytes):
|
||
obj = raw.decode("utf-8", errors="ignore")
|
||
if isinstance(obj, str):
|
||
parsed = __import__("json").loads(obj)
|
||
else:
|
||
continue
|
||
if not isinstance(parsed, dict):
|
||
continue
|
||
if service and str(parsed.get("service")) != service:
|
||
continue
|
||
if level and str(parsed.get("level")) != level:
|
||
continue
|
||
items.append(parsed)
|
||
except Exception:
|
||
continue
|
||
|
||
return {
|
||
"key": list_key,
|
||
"count": len(items),
|
||
"items": items,
|
||
}
|
||
|
||
|
||
def _require_admin(token: Optional[str], provided: Optional[str]) -> None:
|
||
"""
|
||
可选的简单保护:如果环境变量配置了 SYSTEM_CONTROL_TOKEN,则要求请求携带 X-Admin-Token。
|
||
生产环境强烈建议通过 Nginx 额外做鉴权 / IP 白名单。
|
||
"""
|
||
if not token:
|
||
return
|
||
if not provided or provided != token:
|
||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||
|
||
|
||
def _build_supervisorctl_cmd(args: list[str]) -> list[str]:
|
||
supervisorctl_path = os.getenv("SUPERVISORCTL_PATH", "supervisorctl")
|
||
supervisor_conf = os.getenv("SUPERVISOR_CONF", "").strip()
|
||
use_sudo = os.getenv("SUPERVISOR_USE_SUDO", "false").lower() == "true"
|
||
|
||
# 如果没显式配置 SUPERVISOR_CONF,就尝试自动探测常见路径(宝塔/系统)
|
||
if not supervisor_conf:
|
||
candidates = [
|
||
"/www/server/panel/plugin/supervisor/supervisord.conf",
|
||
"/www/server/panel/plugin/supervisor/supervisor.conf",
|
||
"/etc/supervisor/supervisord.conf",
|
||
"/etc/supervisord.conf",
|
||
]
|
||
for p in candidates:
|
||
try:
|
||
if Path(p).exists():
|
||
supervisor_conf = p
|
||
break
|
||
except Exception:
|
||
continue
|
||
|
||
cmd: list[str] = []
|
||
if use_sudo:
|
||
# 需要你在 sudoers 配置 NOPASSWD(sudo -n 才不会卡住)
|
||
cmd += ["sudo", "-n"]
|
||
cmd += [supervisorctl_path]
|
||
if supervisor_conf:
|
||
cmd += ["-c", supervisor_conf]
|
||
cmd += args
|
||
return cmd
|
||
|
||
|
||
def _run_supervisorctl(args: list[str]) -> str:
|
||
cmd = _build_supervisorctl_cmd(args)
|
||
try:
|
||
res = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||
except subprocess.TimeoutExpired:
|
||
raise RuntimeError("supervisorctl 超时(10s)")
|
||
|
||
out = (res.stdout or "").strip()
|
||
err = (res.stderr or "").strip()
|
||
combined = "\n".join([s for s in [out, err] if s]).strip()
|
||
if res.returncode != 0:
|
||
raise RuntimeError(combined or f"supervisorctl failed (exit={res.returncode})")
|
||
return combined or out
|
||
|
||
|
||
def _parse_supervisor_status(raw: str) -> Tuple[bool, Optional[int], str]:
|
||
"""
|
||
典型输出:
|
||
- auto_sys RUNNING pid 1234, uptime 0:10:00
|
||
- auto_sys STOPPED Not started
|
||
"""
|
||
if "RUNNING" in raw:
|
||
m = re.search(r"\bpid\s+(\d+)\b", raw)
|
||
pid = int(m.group(1)) if m else None
|
||
return True, pid, "RUNNING"
|
||
for state in ["STOPPED", "FATAL", "EXITED", "BACKOFF", "STARTING", "UNKNOWN"]:
|
||
if state in raw:
|
||
return False, None, state
|
||
return False, None, "UNKNOWN"
|
||
|
||
|
||
def _get_program_name() -> str:
|
||
# 你给的宝塔配置是 [program:auto_sys]
|
||
return os.getenv("SUPERVISOR_TRADING_PROGRAM", "auto_sys").strip() or "auto_sys"
|
||
|
||
|
||
def _select_best_process_name(program: str, status_all_raw: str) -> Optional[str]:
|
||
"""
|
||
从 `supervisorctl status` 全量输出中,找到最匹配的真实进程名。
|
||
兼容 supervisor 的 group:process 格式,例如:auto_sys:auto_sys_00
|
||
"""
|
||
if not status_all_raw:
|
||
return None
|
||
|
||
lines = [ln.strip() for ln in status_all_raw.splitlines() if ln.strip()]
|
||
names: list[str] = []
|
||
for ln in lines:
|
||
name = ln.split(None, 1)[0].strip()
|
||
if name:
|
||
names.append(name)
|
||
|
||
# 精确优先:program / program_00 / program:program_00
|
||
preferred = [program, f"{program}_00", f"{program}:{program}_00"]
|
||
for cand in preferred:
|
||
if cand in names:
|
||
return cand
|
||
|
||
# 次优:任意以 program_ 开头
|
||
for name in names:
|
||
if name.startswith(program + "_"):
|
||
return name
|
||
|
||
# 次优:任意以 program: 开头
|
||
for name in names:
|
||
if name.startswith(program + ":"):
|
||
return name
|
||
|
||
return None
|
||
|
||
|
||
def _status_with_fallback(program: str) -> Tuple[str, Optional[str], Optional[str]]:
|
||
"""
|
||
- 优先 `status <program>`
|
||
- 若 no such process:返回全量 status,并尝试解析真实 name(例如 auto_sys:auto_sys_00)
|
||
返回:(raw, resolved_name, status_all)
|
||
"""
|
||
try:
|
||
raw = _run_supervisorctl(["status", program])
|
||
return raw, program, None
|
||
except Exception as e:
|
||
msg = str(e).lower()
|
||
if "no such process" not in msg:
|
||
raise
|
||
|
||
status_all = _run_supervisorctl(["status"])
|
||
resolved = _select_best_process_name(program, status_all)
|
||
if resolved:
|
||
try:
|
||
raw = _run_supervisorctl(["status", resolved])
|
||
return raw, resolved, status_all
|
||
except Exception:
|
||
# 兜底:至少把全量输出返回,方便你确认真实进程名
|
||
return status_all, None, status_all
|
||
return status_all, None, status_all
|
||
|
||
|
||
def _action_with_fallback(action: str, program: str) -> Tuple[str, Optional[str], Optional[str]]:
|
||
"""
|
||
对 start/stop/restart 做兜底:如果 program 不存在,尝试解析真实 name 再执行。
|
||
返回:(output, resolved_name, status_all)
|
||
"""
|
||
try:
|
||
out = _run_supervisorctl([action, program])
|
||
return out, program, None
|
||
except Exception as e:
|
||
msg = str(e).lower()
|
||
if "no such process" not in msg:
|
||
raise
|
||
|
||
status_all = _run_supervisorctl(["status"])
|
||
resolved = _select_best_process_name(program, status_all)
|
||
if not resolved:
|
||
# 没找到就把全量输出带上,方便定位
|
||
raise RuntimeError(f"no such process: {program}. 当前 supervisor 进程列表:\n{status_all}")
|
||
|
||
out = _run_supervisorctl([action, resolved])
|
||
return out, resolved, status_all
|
||
|
||
|
||
@router.post("/clear-cache")
|
||
async def clear_cache(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
|
||
"""
|
||
清理配置缓存(Redis Hash: trading_config),并从数据库回灌到 Redis。
|
||
"""
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
try:
|
||
import config_manager
|
||
|
||
cm = getattr(config_manager, "config_manager", None)
|
||
if cm is None:
|
||
raise HTTPException(status_code=500, detail="config_manager 未初始化")
|
||
|
||
deleted_keys: list[str] = []
|
||
|
||
# 1) 清 backend 本地 cache
|
||
try:
|
||
cm._cache = {}
|
||
except Exception:
|
||
pass
|
||
|
||
# 2) 清 Redis 缓存 key(Hash: trading_config)
|
||
try:
|
||
redis_client = getattr(cm, "_redis_client", None)
|
||
redis_connected = getattr(cm, "_redis_connected", False)
|
||
if redis_client is not None and redis_connected:
|
||
try:
|
||
redis_client.ping()
|
||
except Exception:
|
||
redis_connected = False
|
||
|
||
if redis_client is not None and redis_connected:
|
||
try:
|
||
redis_client.delete("trading_config")
|
||
deleted_keys.append("trading_config")
|
||
except Exception as e:
|
||
logger.warning(f"删除 Redis key trading_config 失败: {e}")
|
||
|
||
# 可选:实时推荐缓存(如果存在)
|
||
try:
|
||
redis_client.delete("recommendations:realtime")
|
||
deleted_keys.append("recommendations:realtime")
|
||
except Exception:
|
||
pass
|
||
except Exception as e:
|
||
logger.warning(f"清 Redis 缓存失败: {e}")
|
||
|
||
# 3) 立刻从 DB 回灌到 Redis(避免 trading_system 读到空)
|
||
try:
|
||
cm.reload()
|
||
except Exception as e:
|
||
logger.warning(f"回灌配置到 Redis 失败(仍可能使用DB/本地cache): {e}")
|
||
|
||
return {
|
||
"message": "缓存已清理并回灌",
|
||
"deleted_keys": deleted_keys,
|
||
"note": "如果你使用 supervisor 管理交易系统,请点击“重启交易系统”让新 Key 立即生效。",
|
||
}
|
||
except HTTPException:
|
||
raise
|
||
except Exception as e:
|
||
logger.error(f"清理缓存失败: {e}", exc_info=True)
|
||
raise HTTPException(status_code=500, detail=str(e))
|
||
|
||
|
||
@router.get("/trading/status")
|
||
async def trading_status(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
program = _get_program_name()
|
||
try:
|
||
raw, resolved_name, status_all = _status_with_fallback(program)
|
||
running, pid, state = _parse_supervisor_status(raw)
|
||
return {
|
||
"mode": "supervisor",
|
||
"program": program,
|
||
"resolved_name": resolved_name,
|
||
"running": running,
|
||
"pid": pid,
|
||
"state": state,
|
||
"raw": raw,
|
||
"status_all": status_all,
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(
|
||
status_code=500,
|
||
detail=f"supervisorctl status 失败: {e}. 你可能需要配置 SUPERVISOR_CONF / SUPERVISOR_TRADING_PROGRAM / SUPERVISOR_USE_SUDO",
|
||
)
|
||
|
||
|
||
@router.post("/trading/start")
|
||
async def trading_start(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
program = _get_program_name()
|
||
try:
|
||
out, resolved_name, status_all = _action_with_fallback("start", program)
|
||
raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program)
|
||
running, pid, state = _parse_supervisor_status(raw)
|
||
return {
|
||
"message": "交易系统已启动(supervisor)",
|
||
"output": out,
|
||
"status": {
|
||
"mode": "supervisor",
|
||
"program": program,
|
||
"resolved_name": resolved_name2 or resolved_name,
|
||
"running": running,
|
||
"pid": pid,
|
||
"state": state,
|
||
"raw": raw,
|
||
"status_all": status_all2 or status_all,
|
||
},
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"supervisorctl start 失败: {e}")
|
||
|
||
|
||
@router.post("/trading/stop")
|
||
async def trading_stop(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
program = _get_program_name()
|
||
try:
|
||
out, resolved_name, status_all = _action_with_fallback("stop", program)
|
||
raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program)
|
||
running, pid, state = _parse_supervisor_status(raw)
|
||
return {
|
||
"message": "交易系统已停止(supervisor)",
|
||
"output": out,
|
||
"status": {
|
||
"mode": "supervisor",
|
||
"program": program,
|
||
"resolved_name": resolved_name2 or resolved_name,
|
||
"running": running,
|
||
"pid": pid,
|
||
"state": state,
|
||
"raw": raw,
|
||
"status_all": status_all2 or status_all,
|
||
},
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"supervisorctl stop 失败: {e}")
|
||
|
||
|
||
@router.post("/trading/restart")
|
||
async def trading_restart(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
|
||
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
|
||
|
||
program = _get_program_name()
|
||
try:
|
||
out, resolved_name, status_all = _action_with_fallback("restart", program)
|
||
raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program)
|
||
running, pid, state = _parse_supervisor_status(raw)
|
||
return {
|
||
"message": "交易系统已重启(supervisor)",
|
||
"output": out,
|
||
"status": {
|
||
"mode": "supervisor",
|
||
"program": program,
|
||
"resolved_name": resolved_name2 or resolved_name,
|
||
"running": running,
|
||
"pid": pid,
|
||
"state": state,
|
||
"raw": raw,
|
||
"status_all": status_all2 or status_all,
|
||
},
|
||
}
|
||
except Exception as e:
|
||
raise HTTPException(status_code=500, detail=f"supervisorctl restart 失败: {e}")
|
||
|