This commit is contained in:
薇薇安 2026-01-18 20:49:47 +08:00
parent 46062e442b
commit 50026fb048
8 changed files with 712 additions and 31 deletions

View File

@ -111,7 +111,8 @@ def setup_logging():
service="backend", service="backend",
) )
redis_handler = RedisErrorLogHandler(redis_cfg) redis_handler = RedisErrorLogHandler(redis_cfg)
redis_handler.setLevel(logging.ERROR) # 让 handler 自己按组筛选error/warning/info这里只需要放宽到 INFO
redis_handler.setLevel(logging.INFO)
root_logger.addHandler(redis_handler) root_logger.addHandler(redis_handler)
except Exception: except Exception:
pass pass

View File

@ -1,5 +1,5 @@
""" """
FastAPI backend ERROR 日志写入 Redis List仅保留最近 N FastAPI backend日志写入 Redis List error / warning / info 分组仅保留最近 N
实现与 trading_system/redis_log_handler.py 保持一致避免跨目录导入带来的 PYTHONPATH 问题 实现与 trading_system/redis_log_handler.py 保持一致避免跨目录导入带来的 PYTHONPATH 问题
""" """
@ -14,13 +14,18 @@ import time
import traceback import traceback
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from typing import Any, Dict, Optional from typing import Any, Dict, Optional, Literal
def _beijing_time_str(ts: float) -> str: def _beijing_time_str(ts: float) -> str:
beijing_tz = timezone(timedelta(hours=8)) beijing_tz = timezone(timedelta(hours=8))
return datetime.fromtimestamp(ts, tz=beijing_tz).strftime("%Y-%m-%d %H:%M:%S") return datetime.fromtimestamp(ts, tz=beijing_tz).strftime("%Y-%m-%d %H:%M:%S")
def _beijing_yyyymmdd(ts: Optional[float] = None) -> str:
beijing_tz = timezone(timedelta(hours=8))
dt = datetime.fromtimestamp(ts or time.time(), tz=beijing_tz)
return dt.strftime("%Y%m%d")
def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]: def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]:
try: try:
@ -32,12 +37,45 @@ def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]:
return None return None
LogGroup = Literal["error", "warning", "info"]
def _parse_bool(v: Any, default: bool) -> bool:
if v is None:
return default
if isinstance(v, bool):
return v
s = str(v).strip().lower()
if s in ("1", "true", "yes", "y", "on"):
return True
if s in ("0", "false", "no", "n", "off"):
return False
return default
def _parse_int(v: Any, default: int) -> int:
try:
n = int(str(v).strip())
return n
except Exception:
return default
@dataclass(frozen=True) @dataclass(frozen=True)
class RedisLogConfig: class RedisLogConfig:
redis_url: str redis_url: str
list_key: str = "ats:logs:error" list_key_prefix: str = "ats:logs"
max_len: int = 2000 config_key: str = "ats:logs:config"
stats_key_prefix: str = "ats:logs:stats:added"
max_len_error: int = 2000
max_len_warning: int = 2000
max_len_info: int = 2000
dedupe_consecutive: bool = True dedupe_consecutive: bool = True
enable_error: bool = True
enable_warning: bool = True
enable_info: bool = True
include_debug_in_info: bool = False
config_refresh_sec: float = 5.0
service: str = "backend" service: str = "backend"
hostname: str = socket.gethostname() hostname: str = socket.gethostname()
connect_timeout_sec: float = 1.0 connect_timeout_sec: float = 1.0
@ -56,6 +94,8 @@ class RedisErrorLogHandler(logging.Handler):
self._redis = None self._redis = None
self._redis_ok = False self._redis_ok = False
self._last_connect_attempt_ts = 0.0 self._last_connect_attempt_ts = 0.0
self._last_cfg_refresh_ts = 0.0
self._remote_cfg: Dict[str, Any] = {}
def _connection_kwargs(self) -> Dict[str, Any]: def _connection_kwargs(self) -> Dict[str, Any]:
kwargs: Dict[str, Any] = { kwargs: Dict[str, Any] = {
@ -135,19 +175,106 @@ class RedisErrorLogHandler(logging.Handler):
"count": 1, "count": 1,
} }
def _effective_cfg_bool(self, key: str, default: bool) -> bool:
if key in self._remote_cfg:
return _parse_bool(self._remote_cfg.get(key), default)
return default
def _refresh_remote_config_if_needed(self, client) -> None:
now = time.time()
if now - self._last_cfg_refresh_ts < self.cfg.config_refresh_sec:
return
self._last_cfg_refresh_ts = now
try:
cfg_key = os.getenv("REDIS_LOG_CONFIG_KEY", self.cfg.config_key).strip() or self.cfg.config_key
data = client.hgetall(cfg_key) or {}
normalized: Dict[str, Any] = {}
for k, v in data.items():
if not k:
continue
normalized[str(k).strip()] = v
self._remote_cfg = normalized
except Exception:
return
def _group_for_record(self, record: logging.LogRecord) -> Optional[LogGroup]:
if record.levelno >= logging.ERROR:
return "error"
if record.levelno >= logging.WARNING:
return "warning"
if record.levelno == logging.INFO:
return "info"
if record.levelno == logging.DEBUG and self._effective_cfg_bool("include_debug_in_info", self.cfg.include_debug_in_info):
return "info"
return None
def _list_key_for_group(self, group: LogGroup) -> str:
if group == "error":
legacy = os.getenv("REDIS_LOG_LIST_KEY", "").strip()
if legacy:
return legacy
env_key = os.getenv(f"REDIS_LOG_LIST_KEY_{group.upper()}", "").strip()
if env_key:
return env_key
prefix = os.getenv("REDIS_LOG_LIST_PREFIX", self.cfg.list_key_prefix).strip() or self.cfg.list_key_prefix
return f"{prefix}:{group}"
def _max_len_for_group(self, group: LogGroup) -> int:
env_specific = os.getenv(f"REDIS_LOG_LIST_MAX_LEN_{group.upper()}", "").strip()
if env_specific:
n = _parse_int(env_specific, 0)
return n if n > 0 else (self.cfg.max_len_error if group == "error" else self.cfg.max_len_warning if group == "warning" else self.cfg.max_len_info)
env_global = os.getenv("REDIS_LOG_LIST_MAX_LEN", "").strip()
if env_global:
n = _parse_int(env_global, 0)
if n > 0:
return n
field = f"max_len:{group}"
if field in self._remote_cfg:
n = _parse_int(self._remote_cfg.get(field), 0)
if n > 0:
return n
return self.cfg.max_len_error if group == "error" else self.cfg.max_len_warning if group == "warning" else self.cfg.max_len_info
def _enabled_for_group(self, group: LogGroup) -> bool:
field = f"enabled:{group}"
if field in self._remote_cfg:
return _parse_bool(self._remote_cfg.get(field), True)
return self.cfg.enable_error if group == "error" else self.cfg.enable_warning if group == "warning" else self.cfg.enable_info
def _dedupe_consecutive_enabled(self) -> bool:
if "dedupe_consecutive" in self._remote_cfg:
return _parse_bool(self._remote_cfg.get("dedupe_consecutive"), self.cfg.dedupe_consecutive)
return self.cfg.dedupe_consecutive
def _stats_key(self, group: LogGroup) -> str:
prefix = os.getenv("REDIS_LOG_STATS_PREFIX", self.cfg.stats_key_prefix).strip() or self.cfg.stats_key_prefix
day = _beijing_yyyymmdd()
return f"{prefix}:{day}:{group}"
def emit(self, record: logging.LogRecord) -> None: def emit(self, record: logging.LogRecord) -> None:
try: try:
client = self._get_redis() client = self._get_redis()
if client is None: if client is None:
return return
entry = self._build_entry(record) self._refresh_remote_config_if_needed(client)
list_key = os.getenv("REDIS_LOG_LIST_KEY", self.cfg.list_key).strip() or self.cfg.list_key
max_len = int(os.getenv("REDIS_LOG_LIST_MAX_LEN", str(self.cfg.max_len)) or self.cfg.max_len)
if max_len <= 0:
max_len = self.cfg.max_len
if self.cfg.dedupe_consecutive: group = self._group_for_record(record)
if group is None:
return
if not self._enabled_for_group(group):
return
entry = self._build_entry(record)
list_key = self._list_key_for_group(group)
max_len = self._max_len_for_group(group)
stats_key = self._stats_key(group)
if self._dedupe_consecutive_enabled():
try: try:
head_raw = client.lindex(list_key, 0) head_raw = client.lindex(list_key, 0)
except Exception: except Exception:
@ -169,6 +296,8 @@ class RedisErrorLogHandler(logging.Handler):
pipe = client.pipeline() pipe = client.pipeline()
pipe.lset(list_key, 0, json.dumps(head, ensure_ascii=False)) pipe.lset(list_key, 0, json.dumps(head, ensure_ascii=False))
pipe.ltrim(list_key, 0, max_len - 1) pipe.ltrim(list_key, 0, max_len - 1)
pipe.incr(stats_key, 1)
pipe.expire(stats_key, 14 * 24 * 3600)
pipe.execute() pipe.execute()
return return
except Exception: except Exception:
@ -178,6 +307,8 @@ class RedisErrorLogHandler(logging.Handler):
pipe = client.pipeline() pipe = client.pipeline()
pipe.lpush(list_key, json.dumps(entry, ensure_ascii=False)) pipe.lpush(list_key, json.dumps(entry, ensure_ascii=False))
pipe.ltrim(list_key, 0, max_len - 1) pipe.ltrim(list_key, 0, max_len - 1)
pipe.incr(stats_key, 1)
pipe.expire(stats_key, 14 * 24 * 3600)
pipe.execute() pipe.execute()
except Exception: except Exception:
return return

View File

@ -1,10 +1,12 @@
import os import os
import re import re
import subprocess import subprocess
import json
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional, Tuple from typing import Any, Dict, Optional, Tuple
from fastapi import APIRouter, HTTPException, Header from fastapi import APIRouter, HTTPException, Header
from pydantic import BaseModel
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -12,6 +14,117 @@ logger = logging.getLogger(__name__)
# 路由统一挂在 /api/system 下,前端直接调用 /api/system/... # 路由统一挂在 /api/system 下,前端直接调用 /api/system/...
router = APIRouter(prefix="/api/system") router = APIRouter(prefix="/api/system")
LOG_GROUPS = ("error", "warning", "info")
def _logs_prefix() -> str:
return (os.getenv("REDIS_LOG_LIST_PREFIX", "ats:logs").strip() or "ats:logs")
def _logs_key_for_group(group: str) -> str:
group = (group or "error").strip().lower()
# 兼容旧配置REDIS_LOG_LIST_KEY 仅用于 error
if group == "error":
legacy = os.getenv("REDIS_LOG_LIST_KEY", "").strip()
if legacy:
return legacy
env_key = os.getenv(f"REDIS_LOG_LIST_KEY_{group.upper()}", "").strip()
if env_key:
return env_key
return f"{_logs_prefix()}:{group}"
def _logs_config_key() -> str:
return (os.getenv("REDIS_LOG_CONFIG_KEY", "ats:logs:config").strip() or "ats:logs:config")
def _logs_stats_prefix() -> str:
return (os.getenv("REDIS_LOG_STATS_PREFIX", "ats:logs:stats:added").strip() or "ats:logs:stats:added")
def _beijing_yyyymmdd() -> str:
from datetime import datetime, timezone, timedelta
beijing_tz = timezone(timedelta(hours=8))
return datetime.now(tz=beijing_tz).strftime("%Y%m%d")
def _default_logs_config() -> Dict[str, Any]:
return {
"max_len": {"error": 2000, "warning": 2000, "info": 2000},
"enabled": {"error": True, "warning": True, "info": True},
"dedupe_consecutive": True,
"include_debug_in_info": False,
"keys": {g: _logs_key_for_group(g) for g in LOG_GROUPS},
"config_key": _logs_config_key(),
"stats_prefix": _logs_stats_prefix(),
}
def _merge_logs_config(defaults: Dict[str, Any], redis_hash: Dict[str, str]) -> Dict[str, Any]:
cfg = defaults
for g in LOG_GROUPS:
v = redis_hash.get(f"max_len:{g}")
if v is not None:
try:
n = int(str(v).strip())
if n > 0:
cfg["max_len"][g] = n
except Exception:
pass
ev = redis_hash.get(f"enabled:{g}")
if ev is not None:
s = str(ev).strip().lower()
cfg["enabled"][g] = s in ("1", "true", "yes", "y", "on")
for k in ("dedupe_consecutive", "include_debug_in_info"):
vv = redis_hash.get(k)
if vv is not None:
s = str(vv).strip().lower()
cfg[k] = s in ("1", "true", "yes", "y", "on")
return cfg
def _read_logs_config(client) -> Dict[str, Any]:
defaults = _default_logs_config()
try:
raw = client.hgetall(_logs_config_key()) or {}
return _merge_logs_config(defaults, raw)
except Exception:
return defaults
def _write_logs_config_and_trim(client, cfg: Dict[str, Any]) -> Dict[str, Any]:
mapping: Dict[str, str] = {}
for g in LOG_GROUPS:
mapping[f"max_len:{g}"] = str(int(cfg["max_len"][g]))
mapping[f"enabled:{g}"] = "1" if cfg["enabled"][g] else "0"
mapping["dedupe_consecutive"] = "1" if cfg.get("dedupe_consecutive") else "0"
mapping["include_debug_in_info"] = "1" if cfg.get("include_debug_in_info") else "0"
pipe = client.pipeline()
pipe.hset(_logs_config_key(), mapping=mapping)
for g in LOG_GROUPS:
key = _logs_key_for_group(g)
max_len = int(cfg["max_len"][g])
if max_len > 0:
pipe.ltrim(key, 0, max_len - 1)
pipe.execute()
return cfg
class LogsConfigUpdate(BaseModel):
max_len: Optional[Dict[str, int]] = None
enabled: Optional[Dict[str, bool]] = None
dedupe_consecutive: Optional[bool] = None
include_debug_in_info: Optional[bool] = None
def _get_redis_client_for_logs(): def _get_redis_client_for_logs():
""" """
获取 Redis 客户端优先复用 config_manager 的连接失败则自行创建 获取 Redis 客户端优先复用 config_manager 的连接失败则自行创建
@ -73,15 +186,17 @@ def _get_redis_client_for_logs():
@router.get("/logs") @router.get("/logs")
async def get_logs( async def get_logs(
limit: int = 200, limit: int = 200,
group: str = "error",
service: Optional[str] = None, service: Optional[str] = None,
level: Optional[str] = None, level: Optional[str] = None,
x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token"), x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token"),
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Redis List 读取最新日志默认 ats:logs:error Redis List 读取最新日志默认 group=error -> ats:logs:error
参数 参数
- limit: 返回条数最大 2000 - limit: 返回条数最大 2000
- group: 日志分组error / warning / info
- service: 过滤backend / trading_system - service: 过滤backend / trading_system
- level: 过滤ERROR / CRITICAL ... - level: 过滤ERROR / CRITICAL ...
""" """
@ -92,7 +207,11 @@ async def get_logs(
if limit > 2000: if limit > 2000:
limit = 2000 limit = 2000
list_key = os.getenv("REDIS_LOG_LIST_KEY", "ats:logs:error").strip() or "ats:logs:error" group = (group or "error").strip().lower()
if group not in LOG_GROUPS:
raise HTTPException(status_code=400, detail=f"非法 group{group}(可选:{', '.join(LOG_GROUPS)}")
list_key = _logs_key_for_group(group)
client = _get_redis_client_for_logs() client = _get_redis_client_for_logs()
if client is None: if client is None:
@ -110,7 +229,7 @@ async def get_logs(
if isinstance(raw, bytes): if isinstance(raw, bytes):
obj = raw.decode("utf-8", errors="ignore") obj = raw.decode("utf-8", errors="ignore")
if isinstance(obj, str): if isinstance(obj, str):
parsed = __import__("json").loads(obj) parsed = json.loads(obj)
else: else:
continue continue
if not isinstance(parsed, dict): if not isinstance(parsed, dict):
@ -124,12 +243,103 @@ async def get_logs(
continue continue
return { return {
"group": group,
"key": list_key, "key": list_key,
"count": len(items), "count": len(items),
"items": items, "items": items,
} }
@router.get("/logs/overview")
async def logs_overview(x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token")) -> Dict[str, Any]:
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
client = _get_redis_client_for_logs()
if client is None:
raise HTTPException(status_code=503, detail="Redis 不可用,无法读取日志概览")
cfg = _read_logs_config(client)
day = _beijing_yyyymmdd()
stats_prefix = _logs_stats_prefix()
pipe = client.pipeline()
for g in LOG_GROUPS:
pipe.llen(_logs_key_for_group(g))
for g in LOG_GROUPS:
pipe.get(f"{stats_prefix}:{day}:{g}")
res = pipe.execute()
llen_vals = res[: len(LOG_GROUPS)]
added_vals = res[len(LOG_GROUPS) :]
llen: Dict[str, int] = {}
added_today: Dict[str, int] = {}
for i, g in enumerate(LOG_GROUPS):
try:
llen[g] = int(llen_vals[i] or 0)
except Exception:
llen[g] = 0
try:
added_today[g] = int(added_vals[i] or 0)
except Exception:
added_today[g] = 0
return {
"config": cfg,
"stats": {
"day": day,
"llen": llen,
"added_today": added_today,
},
}
@router.put("/logs/config")
async def update_logs_config(
payload: LogsConfigUpdate,
x_admin_token: Optional[str] = Header(default=None, alias="X-Admin-Token"),
) -> Dict[str, Any]:
_require_admin(os.getenv("SYSTEM_CONTROL_TOKEN", "").strip(), x_admin_token)
client = _get_redis_client_for_logs()
if client is None:
raise HTTPException(status_code=503, detail="Redis 不可用,无法更新日志配置")
cfg = _read_logs_config(client)
if payload.max_len:
for g, v in payload.max_len.items():
gg = (g or "").strip().lower()
if gg not in LOG_GROUPS:
continue
try:
n = int(v)
if n < 100:
n = 100
if n > 20000:
n = 20000
cfg["max_len"][gg] = n
except Exception:
continue
if payload.enabled:
for g, v in payload.enabled.items():
gg = (g or "").strip().lower()
if gg not in LOG_GROUPS:
continue
cfg["enabled"][gg] = bool(v)
if payload.dedupe_consecutive is not None:
cfg["dedupe_consecutive"] = bool(payload.dedupe_consecutive)
if payload.include_debug_in_info is not None:
cfg["include_debug_in_info"] = bool(payload.include_debug_in_info)
cfg = _write_logs_config_and_trim(client, cfg)
return {"message": "ok", "config": cfg}
def _require_admin(token: Optional[str], provided: Optional[str]) -> None: def _require_admin(token: Optional[str], provided: Optional[str]) -> None:
""" """
可选的简单保护如果环境变量配置了 SYSTEM_CONTROL_TOKEN则要求请求携带 X-Admin-Token 可选的简单保护如果环境变量配置了 SYSTEM_CONTROL_TOKEN则要求请求携带 X-Admin-Token

View File

@ -45,6 +45,63 @@
background: #fff; background: #fff;
} }
.log-overview {
padding: 12px;
border: 1px solid #eee;
border-radius: 10px;
background: #fff;
display: flex;
flex-direction: column;
gap: 10px;
}
.overview-row {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
flex-wrap: wrap;
}
.overview-title {
font-weight: 600;
color: #333;
}
.overview-items {
display: flex;
gap: 12px;
flex-wrap: wrap;
color: #444;
font-size: 12px;
}
.overview-config {
display: flex;
gap: 10px;
flex-wrap: wrap;
align-items: flex-end;
}
.overview-config .mini {
display: flex;
flex-direction: column;
gap: 6px;
min-width: 120px;
}
.overview-config .mini label {
font-size: 12px;
color: #666;
}
.overview-config .mini input {
border: 1px solid #ddd;
border-radius: 8px;
padding: 8px 10px;
outline: none;
}
.control { .control {
display: flex; display: flex;
flex-direction: column; flex-direction: column;

View File

@ -2,7 +2,13 @@ import React, { useEffect, useMemo, useState } from 'react'
import { api } from '../services/api' import { api } from '../services/api'
import './LogMonitor.css' import './LogMonitor.css'
const LEVELS = ['', 'ERROR', 'CRITICAL', 'WARNING', 'INFO', 'DEBUG'] const GROUPS = [
{ key: 'error', label: '错误' },
{ key: 'warning', label: '警告' },
{ key: 'info', label: '信息' },
]
const LEVELS = ['', 'ERROR', 'CRITICAL', 'WARNING', 'INFO']
const SERVICES = ['', 'backend', 'trading_system'] const SERVICES = ['', 'backend', 'trading_system']
function formatCount(item) { function formatCount(item) {
@ -15,6 +21,10 @@ export default function LogMonitor() {
const [loading, setLoading] = useState(false) const [loading, setLoading] = useState(false)
const [error, setError] = useState('') const [error, setError] = useState('')
const [group, setGroup] = useState('error')
const [overview, setOverview] = useState(null)
const [saving, setSaving] = useState(false)
const [limit, setLimit] = useState(200) const [limit, setLimit] = useState(200)
const [service, setService] = useState('') const [service, setService] = useState('')
const [level, setLevel] = useState('') const [level, setLevel] = useState('')
@ -22,11 +32,20 @@ export default function LogMonitor() {
const [refreshSec, setRefreshSec] = useState(5) const [refreshSec, setRefreshSec] = useState(5)
const params = useMemo(() => { const params = useMemo(() => {
const p = { limit: String(limit) } const p = { limit: String(limit), group }
if (service) p.service = service if (service) p.service = service
if (level) p.level = level if (level) p.level = level
return p return p
}, [limit, service, level]) }, [limit, service, level, group])
const loadOverview = async () => {
try {
const res = await api.getLogsOverview()
setOverview(res)
} catch (e) {
//
}
}
const load = async () => { const load = async () => {
setLoading(true) setLoading(true)
@ -34,6 +53,7 @@ export default function LogMonitor() {
try { try {
const res = await api.getSystemLogs(params) const res = await api.getSystemLogs(params)
setItems(res?.items || []) setItems(res?.items || [])
await loadOverview()
} catch (e) { } catch (e) {
setError(e?.message || '获取日志失败') setError(e?.message || '获取日志失败')
} finally { } finally {
@ -55,12 +75,44 @@ export default function LogMonitor() {
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [autoRefresh, refreshSec, params]) }, [autoRefresh, refreshSec, params])
const maxLen = overview?.config?.max_len || {}
const enabled = overview?.config?.enabled || {}
const llen = overview?.stats?.llen || {}
const addedToday = overview?.stats?.added_today || {}
const day = overview?.stats?.day || ''
const [maxLenDraft, setMaxLenDraft] = useState({ error: 2000, warning: 2000, info: 2000 })
useEffect(() => {
if (maxLen?.error || maxLen?.warning || maxLen?.info) {
setMaxLenDraft({
error: Number(maxLen.error || 2000),
warning: Number(maxLen.warning || 2000),
info: Number(maxLen.info || 2000),
})
}
}, [maxLen?.error, maxLen?.warning, maxLen?.info])
const saveConfig = async () => {
setSaving(true)
setError('')
try {
await api.updateLogsConfig({ max_len: maxLenDraft })
await loadOverview()
} catch (e) {
setError(e?.message || '更新日志配置失败')
} finally {
setSaving(false)
}
}
return ( return (
<div className="log-monitor"> <div className="log-monitor">
<div className="log-header"> <div className="log-header">
<div> <div>
<h2>日志监控</h2> <h2>日志监控</h2>
<div className="log-subtitle">来源Redis List只保留最近 N 连续同类会合并计数</div> <div className="log-subtitle">
来源Redis List分组存储 + 只保留最近 N + 连续同类合并计数
</div>
</div> </div>
<div className="log-actions"> <div className="log-actions">
<button className="btn" onClick={load} disabled={loading}> <button className="btn" onClick={load} disabled={loading}>
@ -69,7 +121,68 @@ export default function LogMonitor() {
</div> </div>
</div> </div>
<div className="log-overview">
<div className="overview-row">
<div className="overview-title">今日统计 {day ? `(${day})` : ''}</div>
<div className="overview-items">
<span>error: {addedToday.error || 0} / {llen.error || 0}{enabled.error === false ? '(已停用)' : ''}</span>
<span>warning: {addedToday.warning || 0} / {llen.warning || 0}{enabled.warning === false ? '(已停用)' : ''}</span>
<span>info: {addedToday.info || 0} / {llen.info || 0}{enabled.info === false ? '(已停用)' : ''}</span>
</div>
</div>
<div className="overview-row">
<div className="overview-title">最大条数每类</div>
<div className="overview-config">
<div className="mini">
<label>error</label>
<input
type="number"
min="100"
max="20000"
value={maxLenDraft.error}
onChange={(e) => setMaxLenDraft((s) => ({ ...s, error: Number(e.target.value || 2000) }))}
/>
</div>
<div className="mini">
<label>warning</label>
<input
type="number"
min="100"
max="20000"
value={maxLenDraft.warning}
onChange={(e) => setMaxLenDraft((s) => ({ ...s, warning: Number(e.target.value || 2000) }))}
/>
</div>
<div className="mini">
<label>info</label>
<input
type="number"
min="100"
max="20000"
value={maxLenDraft.info}
onChange={(e) => setMaxLenDraft((s) => ({ ...s, info: Number(e.target.value || 2000) }))}
/>
</div>
<button className="btn" onClick={saveConfig} disabled={saving}>
{saving ? '保存中...' : '保存配置'}
</button>
</div>
</div>
</div>
<div className="log-controls"> <div className="log-controls">
<div className="control">
<label>分组</label>
<select value={group} onChange={(e) => setGroup(e.target.value)}>
{GROUPS.map((g) => (
<option key={g.key} value={g.key}>
{g.label}
</option>
))}
</select>
</div>
<div className="control"> <div className="control">
<label>条数</label> <label>条数</label>
<input <input

View File

@ -309,4 +309,26 @@ export const api = {
} }
return response.json(); return response.json();
}, },
getLogsOverview: async () => {
const response = await fetch(buildUrl('/api/system/logs/overview'));
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: '获取日志概览失败' }));
throw new Error(error.detail || '获取日志概览失败');
}
return response.json();
},
updateLogsConfig: async (data) => {
const response = await fetch(buildUrl('/api/system/logs/config'), {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data || {}),
});
if (!response.ok) {
const error = await response.json().catch(() => ({ detail: '更新日志配置失败' }));
throw new Error(error.detail || '更新日志配置失败');
}
return response.json();
},
}; };

View File

@ -80,7 +80,8 @@ try:
service="trading_system", service="trading_system",
) )
redis_handler = RedisErrorLogHandler(redis_cfg) redis_handler = RedisErrorLogHandler(redis_cfg)
redis_handler.setLevel(logging.ERROR) # 让 handler 自己按组筛选error/warning/info这里只需要放宽到 INFO
redis_handler.setLevel(logging.INFO)
logging.getLogger().addHandler(redis_handler) logging.getLogger().addHandler(redis_handler)
except Exception: except Exception:
# Redis handler 仅用于增强监控,失败不影响交易系统启动 # Redis handler 仅用于增强监控,失败不影响交易系统启动

View File

@ -1,14 +1,18 @@
""" """
Python logging ERROR 日志写入 Redis List仅保留最近 N Python logging 日志写入 Redis List error / warning / info 分组仅保留最近 N
设计目标 设计目标
- 不影响现有日志文件/控制台Redis 不可用时静默降级 - 不影响现有日志文件/控制台Redis 不可用时静默降级
- 只写入 ERROR/CRITICAL handler level 控制 - 默认写入ERROR/CRITICALWARNINGINFODEBUG 默认不写入
- 支持同类内容去重仅对连续相同 signature的日志做合并计数count++ + 更新时间 - 支持同类内容去重仅对连续相同 signature的日志做合并计数count++ + 更新时间
- 支持前端动态调整通过 Redis Hash 配置无需重启进程handler 内部做轻量缓存刷新
- 支持记录量统计北京时间日期 + 分组累加计数
Redis 数据结构 Redis 数据结构
- List key默认 ats:logs:errorlist 头部是最新日志 - List key默认 ats:logs:{group}list 头部是最新日志
- 每条日志是 JSON 字符串 - 每条日志是 JSON 字符串
- 配置 Hash默认 ats:logs:config
- 统计 Key默认 ats:logs:stats:added:{YYYYMMDD}:{group}
""" """
from __future__ import annotations from __future__ import annotations
@ -21,13 +25,18 @@ import time
import traceback import traceback
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
from typing import Any, Dict, Optional from typing import Any, Dict, Optional, Literal
def _beijing_time_str(ts: float) -> str: def _beijing_time_str(ts: float) -> str:
beijing_tz = timezone(timedelta(hours=8)) beijing_tz = timezone(timedelta(hours=8))
return datetime.fromtimestamp(ts, tz=beijing_tz).strftime("%Y-%m-%d %H:%M:%S") return datetime.fromtimestamp(ts, tz=beijing_tz).strftime("%Y-%m-%d %H:%M:%S")
def _beijing_yyyymmdd(ts: Optional[float] = None) -> str:
beijing_tz = timezone(timedelta(hours=8))
dt = datetime.fromtimestamp(ts or time.time(), tz=beijing_tz)
return dt.strftime("%Y%m%d")
def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]: def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]:
try: try:
@ -39,12 +48,45 @@ def _safe_json_loads(s: str) -> Optional[Dict[str, Any]]:
return None return None
LogGroup = Literal["error", "warning", "info"]
def _parse_bool(v: Any, default: bool) -> bool:
if v is None:
return default
if isinstance(v, bool):
return v
s = str(v).strip().lower()
if s in ("1", "true", "yes", "y", "on"):
return True
if s in ("0", "false", "no", "n", "off"):
return False
return default
def _parse_int(v: Any, default: int) -> int:
try:
n = int(str(v).strip())
return n
except Exception:
return default
@dataclass(frozen=True) @dataclass(frozen=True)
class RedisLogConfig: class RedisLogConfig:
redis_url: str redis_url: str
list_key: str = "ats:logs:error" list_key_prefix: str = "ats:logs"
max_len: int = 2000 config_key: str = "ats:logs:config"
stats_key_prefix: str = "ats:logs:stats:added"
max_len_error: int = 2000
max_len_warning: int = 2000
max_len_info: int = 2000
dedupe_consecutive: bool = True dedupe_consecutive: bool = True
enable_error: bool = True
enable_warning: bool = True
enable_info: bool = True
include_debug_in_info: bool = False
config_refresh_sec: float = 5.0
service: str = "trading_system" service: str = "trading_system"
hostname: str = socket.gethostname() hostname: str = socket.gethostname()
connect_timeout_sec: float = 1.0 connect_timeout_sec: float = 1.0
@ -69,6 +111,8 @@ class RedisErrorLogHandler(logging.Handler):
self._redis = None self._redis = None
self._redis_ok = False self._redis_ok = False
self._last_connect_attempt_ts = 0.0 self._last_connect_attempt_ts = 0.0
self._last_cfg_refresh_ts = 0.0
self._remote_cfg: Dict[str, Any] = {}
def _connection_kwargs(self) -> Dict[str, Any]: def _connection_kwargs(self) -> Dict[str, Any]:
kwargs: Dict[str, Any] = { kwargs: Dict[str, Any] = {
@ -150,19 +194,117 @@ class RedisErrorLogHandler(logging.Handler):
"count": 1, "count": 1,
} }
def _group_for_record(self, record: logging.LogRecord) -> Optional[LogGroup]:
# ERROR/CRITICAL -> error
if record.levelno >= logging.ERROR:
return "error"
# WARNING -> warning
if record.levelno >= logging.WARNING:
return "warning"
# INFO -> info
if record.levelno == logging.INFO:
return "info"
# DEBUG默认不写可通过配置打开并归入 info
if record.levelno == logging.DEBUG and self._effective_cfg_bool("include_debug_in_info", self.cfg.include_debug_in_info):
return "info"
return None
def _effective_cfg_bool(self, key: str, default: bool) -> bool:
if key in self._remote_cfg:
return _parse_bool(self._remote_cfg.get(key), default)
return default
def _refresh_remote_config_if_needed(self, client) -> None:
now = time.time()
if now - self._last_cfg_refresh_ts < self.cfg.config_refresh_sec:
return
self._last_cfg_refresh_ts = now
try:
cfg_key = os.getenv("REDIS_LOG_CONFIG_KEY", self.cfg.config_key).strip() or self.cfg.config_key
data = client.hgetall(cfg_key) or {}
# 约定hash field 使用 max_len:error / enabled:info 等
normalized: Dict[str, Any] = {}
for k, v in data.items():
if not k:
continue
normalized[str(k).strip()] = v
self._remote_cfg = normalized
except Exception:
# 读取失败就沿用旧缓存
return
def _list_key_for_group(self, group: LogGroup) -> str:
# 兼容旧环境变量REDIS_LOG_LIST_KEY仅用于 error
if group == "error":
legacy = os.getenv("REDIS_LOG_LIST_KEY", "").strip()
if legacy:
return legacy
env_key = os.getenv(f"REDIS_LOG_LIST_KEY_{group.upper()}", "").strip()
if env_key:
return env_key
prefix = os.getenv("REDIS_LOG_LIST_PREFIX", self.cfg.list_key_prefix).strip() or self.cfg.list_key_prefix
return f"{prefix}:{group}"
def _max_len_for_group(self, group: LogGroup) -> int:
# env 最高优先级(便于应急)
env_specific = os.getenv(f"REDIS_LOG_LIST_MAX_LEN_{group.upper()}", "").strip()
if env_specific:
n = _parse_int(env_specific, 0)
return n if n > 0 else (self.cfg.max_len_error if group == "error" else self.cfg.max_len_warning if group == "warning" else self.cfg.max_len_info)
# 其次:全局 env
env_global = os.getenv("REDIS_LOG_LIST_MAX_LEN", "").strip()
if env_global:
n = _parse_int(env_global, 0)
if n > 0:
return n
# 再其次Redis 配置
field = f"max_len:{group}"
if field in self._remote_cfg:
n = _parse_int(self._remote_cfg.get(field), 0)
if n > 0:
return n
# 最后:本地默认
return self.cfg.max_len_error if group == "error" else self.cfg.max_len_warning if group == "warning" else self.cfg.max_len_info
def _enabled_for_group(self, group: LogGroup) -> bool:
field = f"enabled:{group}"
if field in self._remote_cfg:
return _parse_bool(self._remote_cfg.get(field), True)
return self.cfg.enable_error if group == "error" else self.cfg.enable_warning if group == "warning" else self.cfg.enable_info
def _dedupe_consecutive_enabled(self) -> bool:
if "dedupe_consecutive" in self._remote_cfg:
return _parse_bool(self._remote_cfg.get("dedupe_consecutive"), self.cfg.dedupe_consecutive)
return self.cfg.dedupe_consecutive
def _stats_key(self, group: LogGroup) -> str:
prefix = os.getenv("REDIS_LOG_STATS_PREFIX", self.cfg.stats_key_prefix).strip() or self.cfg.stats_key_prefix
day = _beijing_yyyymmdd()
return f"{prefix}:{day}:{group}"
def emit(self, record: logging.LogRecord) -> None: def emit(self, record: logging.LogRecord) -> None:
try: try:
client = self._get_redis() client = self._get_redis()
if client is None: if client is None:
return return
entry = self._build_entry(record) self._refresh_remote_config_if_needed(client)
list_key = os.getenv("REDIS_LOG_LIST_KEY", self.cfg.list_key).strip() or self.cfg.list_key
max_len = int(os.getenv("REDIS_LOG_LIST_MAX_LEN", str(self.cfg.max_len)) or self.cfg.max_len)
if max_len <= 0:
max_len = self.cfg.max_len
if self.cfg.dedupe_consecutive: group = self._group_for_record(record)
if group is None:
return
if not self._enabled_for_group(group):
return
entry = self._build_entry(record)
list_key = self._list_key_for_group(group)
max_len = self._max_len_for_group(group)
stats_key = self._stats_key(group)
if self._dedupe_consecutive_enabled():
try: try:
head_raw = client.lindex(list_key, 0) head_raw = client.lindex(list_key, 0)
except Exception: except Exception:
@ -185,6 +327,8 @@ class RedisErrorLogHandler(logging.Handler):
pipe = client.pipeline() pipe = client.pipeline()
pipe.lset(list_key, 0, json.dumps(head, ensure_ascii=False)) pipe.lset(list_key, 0, json.dumps(head, ensure_ascii=False))
pipe.ltrim(list_key, 0, max_len - 1) pipe.ltrim(list_key, 0, max_len - 1)
pipe.incr(stats_key, 1)
pipe.expire(stats_key, 14 * 24 * 3600)
pipe.execute() pipe.execute()
return return
except Exception: except Exception:
@ -195,6 +339,8 @@ class RedisErrorLogHandler(logging.Handler):
pipe = client.pipeline() pipe = client.pipeline()
pipe.lpush(list_key, json.dumps(entry, ensure_ascii=False)) pipe.lpush(list_key, json.dumps(entry, ensure_ascii=False))
pipe.ltrim(list_key, 0, max_len - 1) pipe.ltrim(list_key, 0, max_len - 1)
pipe.incr(stats_key, 1)
pipe.expire(stats_key, 14 * 24 * 3600)
pipe.execute() pipe.execute()
except Exception: except Exception:
# Redis 写失败不应影响业务 # Redis 写失败不应影响业务