#!/usr/bin/env python3 # -*- coding: utf-8 -*- """从 MySQL delivery_media_snapshot_feedback 导出增量训练 JSONL。""" import argparse import json import os import sys try: import pymysql except ImportError: pymysql = None def main(): parser = argparse.ArgumentParser() parser.add_argument("--host", default=os.environ.get("MYSQL_HOST", "127.0.0.1")) parser.add_argument("--port", type=int, default=int(os.environ.get("MYSQL_PORT", "3306"))) parser.add_argument("--user", default=os.environ.get("MYSQL_USER", "root")) parser.add_argument("--password", default=os.environ.get("MYSQL_PASSWORD", "")) parser.add_argument("--database", default=os.environ.get("MYSQL_DATABASE", "wuhuyancao")) parser.add_argument("--ftp-prefix", default=os.environ.get("FTP_RESOURCE_PREFIX", "http://127.0.0.1/files")) parser.add_argument("-o", "--output", default="data/feedback_export.jsonl") args = parser.parse_args() if pymysql is None: print("请安装 pymysql", file=sys.stderr) sys.exit(1) media_folder = os.environ.get("COLLECTION_MEDIA_FOLDER", "/collection_media/") conn = pymysql.connect( host=args.host, port=args.port, user=args.user, password=args.password, database=args.database, charset="utf8mb4", ) sql = """ SELECT f.media_id, f.snapshot_type, f.ai_time_sec, f.manual_time_sec, m.file_path_local, m.start_time, m.recorder_sn FROM delivery_media_snapshot_feedback f JOIN collection_media m ON m.id = f.media_id AND m.isdeleted = 0 WHERE f.isdeleted = 0 ORDER BY f.id """ groups = {} with conn.cursor() as cur: cur.execute(sql) for row in cur.fetchall(): media_id, snap_type, ai_t, manual_t, path_local, start_time, recorder_sn = row if media_id not in groups: video_url = args.ftp_prefix.rstrip("/") + "/" + media_folder.strip("/") + "/" + (path_local or "").lstrip("/") driver_date = "" if start_time: driver_date = f"{recorder_sn or 'unknown'}_{start_time.strftime('%Y%m%d')}" groups[media_id] = { "media_id": media_id, "video_path": video_url, "storefront_time_sec": None, "handover_time_sec": None, "driver_date": driver_date, "split": "train", "notes": "from_feedback", } if snap_type == 1: groups[media_id]["storefront_time_sec"] = float(manual_t) elif snap_type == 2: groups[media_id]["handover_time_sec"] = float(manual_t) conn.close() os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True) count = 0 with open(args.output, "w", encoding="utf-8") as out: for item in groups.values(): if item["storefront_time_sec"] is None or item["handover_time_sec"] is None: continue out.write(json.dumps(item, ensure_ascii=False) + "\n") count += 1 print(f"导出 {count} 条 -> {args.output}") if __name__ == "__main__": main()