import os import re import subprocess from pathlib import Path from typing import Any, Dict, Optional, Tuple from fastapi import APIRouter, HTTPException, Header import logging logger = logging.getLogger(__name__) # 路由统一挂在 /api/system 下,前端直接调用 /api/system/... router = APIRouter(prefix="/api/system") def _get_redis_client_for_logs(): """ 获取 Redis 客户端(优先复用 config_manager 的连接;失败则自行创建)。 返回:redis.Redis 或 None """ # 1) 复用 config_manager(避免重复连接) try: import config_manager # backend/config_manager.py(已负责加载 .env) cm = getattr(config_manager, "config_manager", None) if cm is not None: redis_client = getattr(cm, "_redis_client", None) redis_connected = getattr(cm, "_redis_connected", False) if redis_client is not None and redis_connected: try: redis_client.ping() return redis_client except Exception: pass except Exception: pass # 2) 自行创建 try: import redis # type: ignore redis_url = os.getenv("REDIS_URL", "redis://localhost:6379") redis_use_tls = os.getenv("REDIS_USE_TLS", "False").lower() == "true" redis_username = os.getenv("REDIS_USERNAME", None) redis_password = os.getenv("REDIS_PASSWORD", None) ssl_cert_reqs = os.getenv("REDIS_SSL_CERT_REQS", "required") ssl_ca_certs = os.getenv("REDIS_SSL_CA_CERTS", None) kwargs: Dict[str, Any] = { "decode_responses": True, "username": redis_username, "password": redis_password, "socket_connect_timeout": 1, "socket_timeout": 1, } if redis_url.startswith("rediss://") or redis_use_tls: kwargs["ssl_cert_reqs"] = ssl_cert_reqs if ssl_ca_certs: kwargs["ssl_ca_certs"] = ssl_ca_certs if ssl_cert_reqs == "none": kwargs["ssl_check_hostname"] = False elif ssl_cert_reqs == "required": kwargs["ssl_check_hostname"] = True else: kwargs["ssl_check_hostname"] = False client = redis.from_url(redis_url, **kwargs) client.ping() return client except Exception: return None @router.get("/logs") async def get_logs( limit: int = 200, service: Optional[str] = None, level: Optional[str] = None, x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token"), ) -> Dict[str, Any]: """ 从 Redis List 读取最新日志(默认 ats:logs:error)。 参数: - limit: 返回条数(最大 2000) - service: 过滤(backend / trading_system) - level: 过滤(ERROR / CRITICAL ...) """ _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) if limit <= 0: limit = 200 if limit > 2000: limit = 2000 list_key = os.getenv("REDIS_LOG_LIST_KEY", "ats:logs:error").strip() or "ats:logs:error" client = _get_redis_client_for_logs() if client is None: raise HTTPException(status_code=503, detail="Redis 不可用,无法读取日志") try: raw_items = client.lrange(list_key, 0, limit - 1) except Exception as e: raise HTTPException(status_code=500, detail=f"读取 Redis 日志失败: {e}") items: list[Dict[str, Any]] = [] for raw in raw_items or []: try: obj = raw if isinstance(raw, bytes): obj = raw.decode("utf-8", errors="ignore") if isinstance(obj, str): parsed = __import__("json").loads(obj) else: continue if not isinstance(parsed, dict): continue if service and str(parsed.get("service")) != service: continue if level and str(parsed.get("level")) != level: continue items.append(parsed) except Exception: continue return { "key": list_key, "count": len(items), "items": items, } def _require_admin(token: Optional[str], provided: Optional[str]) -> None: """ 可选的简单保护:如果环境变量配置了 SYSTEM_CONTROL_TOKEN,则要求请求携带 X-Admin-Token。 生产环境强烈建议通过 Nginx 额外做鉴权 / IP 白名单。 """ if not token: return if not provided or provided != token: raise HTTPException(status_code=401, detail="Unauthorized") def _build_supervisorctl_cmd(args: list[str]) -> list[str]: supervisorctl_path = os.getenv("SUPERVISORCTL_PATH", "supervisorctl") supervisor_conf = os.getenv("SUPERVISOR_CONF", "").strip() use_sudo = os.getenv("SUPERVISOR_USE_SUDO", "false").lower() == "true" # 如果没显式配置 SUPERVISOR_CONF,就尝试自动探测常见路径(宝塔/系统) if not supervisor_conf: candidates = [ "/www/server/panel/plugin/supervisor/supervisord.conf", "/www/server/panel/plugin/supervisor/supervisor.conf", "/etc/supervisor/supervisord.conf", "/etc/supervisord.conf", ] for p in candidates: try: if Path(p).exists(): supervisor_conf = p break except Exception: continue cmd: list[str] = [] if use_sudo: # 需要你在 sudoers 配置 NOPASSWD(sudo -n 才不会卡住) cmd += ["sudo", "-n"] cmd += [supervisorctl_path] if supervisor_conf: cmd += ["-c", supervisor_conf] cmd += args return cmd def _run_supervisorctl(args: list[str]) -> str: cmd = _build_supervisorctl_cmd(args) try: res = subprocess.run(cmd, capture_output=True, text=True, timeout=10) except subprocess.TimeoutExpired: raise RuntimeError("supervisorctl 超时(10s)") out = (res.stdout or "").strip() err = (res.stderr or "").strip() combined = "\n".join([s for s in [out, err] if s]).strip() if res.returncode != 0: raise RuntimeError(combined or f"supervisorctl failed (exit={res.returncode})") return combined or out def _parse_supervisor_status(raw: str) -> Tuple[bool, Optional[int], str]: """ 典型输出: - auto_sys RUNNING pid 1234, uptime 0:10:00 - auto_sys STOPPED Not started """ if "RUNNING" in raw: m = re.search(r"\bpid\s+(\d+)\b", raw) pid = int(m.group(1)) if m else None return True, pid, "RUNNING" for state in ["STOPPED", "FATAL", "EXITED", "BACKOFF", "STARTING", "UNKNOWN"]: if state in raw: return False, None, state return False, None, "UNKNOWN" def _get_program_name() -> str: # 你给的宝塔配置是 [program:auto_sys] return os.getenv("SUPERVISOR_TRADING_PROGRAM", "auto_sys").strip() or "auto_sys" def _select_best_process_name(program: str, status_all_raw: str) -> Optional[str]: """ 从 `supervisorctl status` 全量输出中,找到最匹配的真实进程名。 兼容 supervisor 的 group:process 格式,例如:auto_sys:auto_sys_00 """ if not status_all_raw: return None lines = [ln.strip() for ln in status_all_raw.splitlines() if ln.strip()] names: list[str] = [] for ln in lines: name = ln.split(None, 1)[0].strip() if name: names.append(name) # 精确优先:program / program_00 / program:program_00 preferred = [program, f"{program}_00", f"{program}:{program}_00"] for cand in preferred: if cand in names: return cand # 次优:任意以 program_ 开头 for name in names: if name.startswith(program + "_"): return name # 次优:任意以 program: 开头 for name in names: if name.startswith(program + ":"): return name return None def _status_with_fallback(program: str) -> Tuple[str, Optional[str], Optional[str]]: """ - 优先 `status ` - 若 no such process:返回全量 status,并尝试解析真实 name(例如 auto_sys:auto_sys_00) 返回:(raw, resolved_name, status_all) """ try: raw = _run_supervisorctl(["status", program]) return raw, program, None except Exception as e: msg = str(e).lower() if "no such process" not in msg: raise status_all = _run_supervisorctl(["status"]) resolved = _select_best_process_name(program, status_all) if resolved: try: raw = _run_supervisorctl(["status", resolved]) return raw, resolved, status_all except Exception: # 兜底:至少把全量输出返回,方便你确认真实进程名 return status_all, None, status_all return status_all, None, status_all def _action_with_fallback(action: str, program: str) -> Tuple[str, Optional[str], Optional[str]]: """ 对 start/stop/restart 做兜底:如果 program 不存在,尝试解析真实 name 再执行。 返回:(output, resolved_name, status_all) """ try: out = _run_supervisorctl([action, program]) return out, program, None except Exception as e: msg = str(e).lower() if "no such process" not in msg: raise status_all = _run_supervisorctl(["status"]) resolved = _select_best_process_name(program, status_all) if not resolved: # 没找到就把全量输出带上,方便定位 raise RuntimeError(f"no such process: {program}. 当前 supervisor 进程列表:\n{status_all}") out = _run_supervisorctl([action, resolved]) return out, resolved, status_all @router.post("/clear-cache") async def clear_cache(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]: """ 清理配置缓存(Redis Hash: trading_config),并从数据库回灌到 Redis。 """ _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) try: import config_manager cm = getattr(config_manager, "config_manager", None) if cm is None: raise HTTPException(status_code=500, detail="config_manager 未初始化") deleted_keys: list[str] = [] # 1) 清 backend 本地 cache try: cm._cache = {} except Exception: pass # 2) 清 Redis 缓存 key(Hash: trading_config) try: redis_client = getattr(cm, "_redis_client", None) redis_connected = getattr(cm, "_redis_connected", False) if redis_client is not None and redis_connected: try: redis_client.ping() except Exception: redis_connected = False if redis_client is not None and redis_connected: try: redis_client.delete("trading_config") deleted_keys.append("trading_config") except Exception as e: logger.warning(f"删除 Redis key trading_config 失败: {e}") # 可选:实时推荐缓存(如果存在) try: redis_client.delete("recommendations:realtime") deleted_keys.append("recommendations:realtime") except Exception: pass except Exception as e: logger.warning(f"清 Redis 缓存失败: {e}") # 3) 立刻从 DB 回灌到 Redis(避免 trading_system 读到空) try: cm.reload() except Exception as e: logger.warning(f"回灌配置到 Redis 失败(仍可能使用DB/本地cache): {e}") return { "message": "缓存已清理并回灌", "deleted_keys": deleted_keys, "note": "如果你使用 supervisor 管理交易系统,请点击“重启交易系统”让新 Key 立即生效。", } except HTTPException: raise except Exception as e: logger.error(f"清理缓存失败: {e}", exc_info=True) raise HTTPException(status_code=500, detail=str(e)) @router.get("/trading/status") async def trading_status(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]: _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) program = _get_program_name() try: raw, resolved_name, status_all = _status_with_fallback(program) running, pid, state = _parse_supervisor_status(raw) return { "mode": "supervisor", "program": program, "resolved_name": resolved_name, "running": running, "pid": pid, "state": state, "raw": raw, "status_all": status_all, } except Exception as e: raise HTTPException( status_code=500, detail=f"supervisorctl status 失败: {e}. 你可能需要配置 SUPERVISOR_CONF / SUPERVISOR_TRADING_PROGRAM / SUPERVISOR_USE_SUDO", ) @router.post("/trading/start") async def trading_start(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]: _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) program = _get_program_name() try: out, resolved_name, status_all = _action_with_fallback("start", program) raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program) running, pid, state = _parse_supervisor_status(raw) return { "message": "交易系统已启动(supervisor)", "output": out, "status": { "mode": "supervisor", "program": program, "resolved_name": resolved_name2 or resolved_name, "running": running, "pid": pid, "state": state, "raw": raw, "status_all": status_all2 or status_all, }, } except Exception as e: raise HTTPException(status_code=500, detail=f"supervisorctl start 失败: {e}") @router.post("/trading/stop") async def trading_stop(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]: _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) program = _get_program_name() try: out, resolved_name, status_all = _action_with_fallback("stop", program) raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program) running, pid, state = _parse_supervisor_status(raw) return { "message": "交易系统已停止(supervisor)", "output": out, "status": { "mode": "supervisor", "program": program, "resolved_name": resolved_name2 or resolved_name, "running": running, "pid": pid, "state": state, "raw": raw, "status_all": status_all2 or status_all, }, } except Exception as e: raise HTTPException(status_code=500, detail=f"supervisorctl stop 失败: {e}") @router.post("/trading/restart") async def trading_restart(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]: _require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token) program = _get_program_name() try: out, resolved_name, status_all = _action_with_fallback("restart", program) raw, resolved_name2, status_all2 = _status_with_fallback(resolved_name or program) running, pid, state = _parse_supervisor_status(raw) return { "message": "交易系统已重启(supervisor)", "output": out, "status": { "mode": "supervisor", "program": program, "resolved_name": resolved_name2 or resolved_name, "running": running, "pid": pid, "state": state, "raw": raw, "status_all": status_all2 or status_all, }, } except Exception as e: raise HTTPException(status_code=500, detail=f"supervisorctl restart 失败: {e}")