#!/usr/bin/env python3
|
# -*- coding: utf-8 -*-
|
"""从 MySQL delivery_media_snapshot_feedback 导出增量训练 JSONL。"""
|
import argparse
|
import json
|
import os
|
import sys
|
|
try:
|
import pymysql
|
except ImportError:
|
pymysql = None
|
|
|
def main():
|
parser = argparse.ArgumentParser()
|
parser.add_argument("--host", default=os.environ.get("MYSQL_HOST", "127.0.0.1"))
|
parser.add_argument("--port", type=int, default=int(os.environ.get("MYSQL_PORT", "3306")))
|
parser.add_argument("--user", default=os.environ.get("MYSQL_USER", "root"))
|
parser.add_argument("--password", default=os.environ.get("MYSQL_PASSWORD", ""))
|
parser.add_argument("--database", default=os.environ.get("MYSQL_DATABASE", "wuhuyancao"))
|
parser.add_argument("--ftp-prefix", default=os.environ.get("FTP_RESOURCE_PREFIX", "http://127.0.0.1/files"))
|
parser.add_argument("-o", "--output", default="data/feedback_export.jsonl")
|
args = parser.parse_args()
|
|
if pymysql is None:
|
print("请安装 pymysql", file=sys.stderr)
|
sys.exit(1)
|
|
media_folder = os.environ.get("COLLECTION_MEDIA_FOLDER", "/collection_media/")
|
conn = pymysql.connect(
|
host=args.host, port=args.port, user=args.user,
|
password=args.password, database=args.database, charset="utf8mb4",
|
)
|
sql = """
|
SELECT f.media_id, f.snapshot_type, f.ai_time_sec, f.manual_time_sec,
|
m.file_path_local, m.start_time, m.recorder_sn
|
FROM delivery_media_snapshot_feedback f
|
JOIN collection_media m ON m.id = f.media_id AND m.isdeleted = 0
|
WHERE f.isdeleted = 0
|
ORDER BY f.id
|
"""
|
groups = {}
|
with conn.cursor() as cur:
|
cur.execute(sql)
|
for row in cur.fetchall():
|
media_id, snap_type, ai_t, manual_t, path_local, start_time, recorder_sn = row
|
if media_id not in groups:
|
video_url = args.ftp_prefix.rstrip("/") + "/" + media_folder.strip("/") + "/" + (path_local or "").lstrip("/")
|
driver_date = ""
|
if start_time:
|
driver_date = f"{recorder_sn or 'unknown'}_{start_time.strftime('%Y%m%d')}"
|
groups[media_id] = {
|
"media_id": media_id,
|
"video_path": video_url,
|
"storefront_time_sec": None,
|
"handover_time_sec": None,
|
"driver_date": driver_date,
|
"split": "train",
|
"notes": "from_feedback",
|
}
|
if snap_type == 1:
|
groups[media_id]["storefront_time_sec"] = float(manual_t)
|
elif snap_type == 2:
|
groups[media_id]["handover_time_sec"] = float(manual_t)
|
conn.close()
|
|
os.makedirs(os.path.dirname(args.output) or ".", exist_ok=True)
|
count = 0
|
with open(args.output, "w", encoding="utf-8") as out:
|
for item in groups.values():
|
if item["storefront_time_sec"] is None or item["handover_time_sec"] is None:
|
continue
|
out.write(json.dumps(item, ensure_ascii=False) + "\n")
|
count += 1
|
print(f"导出 {count} 条 -> {args.output}")
|
|
|
if __name__ == "__main__":
|
main()
|