更新
This commit is contained in:
@@ -369,7 +369,7 @@ class LipSyncService:
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=1200.0) as client:
|
||||
async with httpx.AsyncClient(timeout=3600.0) as client:
|
||||
# 先检查健康状态
|
||||
try:
|
||||
resp = await client.get(f"{server_url}/health", timeout=5.0)
|
||||
|
||||
@@ -247,19 +247,67 @@ class WhisperService:
|
||||
line_segments = split_segment_to_lines(all_words, max_chars)
|
||||
all_segments.extend(line_segments)
|
||||
|
||||
# 如果提供了 original_text,用原文替换 Whisper 转录文字
|
||||
# 如果提供了 original_text,用原文替换 Whisper 转录文字,保留语音节奏
|
||||
if original_text and original_text.strip() and whisper_first_start is not None:
|
||||
logger.info(f"Using original_text for subtitles (len={len(original_text)}), "
|
||||
f"Whisper time range: {whisper_first_start:.2f}-{whisper_last_end:.2f}s")
|
||||
# 用 split_word_to_chars 拆分原文
|
||||
# 收集 Whisper 逐字时间戳(保留真实语音节奏)
|
||||
whisper_chars = []
|
||||
for seg in all_segments:
|
||||
whisper_chars.extend(seg.get("words", []))
|
||||
|
||||
# 用原文字符 + Whisper 节奏生成新的时间戳
|
||||
orig_chars = split_word_to_chars(
|
||||
original_text.strip(),
|
||||
whisper_first_start,
|
||||
whisper_last_end
|
||||
)
|
||||
if orig_chars:
|
||||
|
||||
if orig_chars and len(whisper_chars) >= 2:
|
||||
# 将原文字符按比例映射到 Whisper 的时间节奏上
|
||||
n_w = len(whisper_chars)
|
||||
n_o = len(orig_chars)
|
||||
w_starts = [c["start"] for c in whisper_chars]
|
||||
w_final_end = whisper_chars[-1]["end"]
|
||||
|
||||
logger.info(
|
||||
f"Using original_text for subtitles (len={len(original_text)}), "
|
||||
f"rhythm-mapping {n_o} orig chars onto {n_w} Whisper chars, "
|
||||
f"time range: {whisper_first_start:.2f}-{whisper_last_end:.2f}s"
|
||||
)
|
||||
|
||||
remapped = []
|
||||
for i, oc in enumerate(orig_chars):
|
||||
# 原文第 i 个字符对应 Whisper 时间线的位置
|
||||
pos = (i / n_o) * n_w
|
||||
idx = min(int(pos), n_w - 1)
|
||||
frac = pos - idx
|
||||
t_start = (
|
||||
w_starts[idx] + frac * (w_starts[idx + 1] - w_starts[idx])
|
||||
if idx < n_w - 1
|
||||
else w_starts[idx] + frac * (w_final_end - w_starts[idx])
|
||||
)
|
||||
|
||||
# 结束时间 = 下一个字符的开始时间
|
||||
pos_next = ((i + 1) / n_o) * n_w
|
||||
idx_n = min(int(pos_next), n_w - 1)
|
||||
frac_n = pos_next - idx_n
|
||||
t_end = (
|
||||
w_starts[idx_n] + frac_n * (w_starts[idx_n + 1] - w_starts[idx_n])
|
||||
if idx_n < n_w - 1
|
||||
else w_starts[idx_n] + frac_n * (w_final_end - w_starts[idx_n])
|
||||
)
|
||||
|
||||
remapped.append({
|
||||
"word": oc["word"],
|
||||
"start": round(t_start, 3),
|
||||
"end": round(t_end, 3),
|
||||
})
|
||||
|
||||
all_segments = split_segment_to_lines(remapped, max_chars)
|
||||
logger.info(f"Rebuilt {len(all_segments)} subtitle segments (rhythm-mapped)")
|
||||
elif orig_chars:
|
||||
# Whisper 字符不足,退回线性插值
|
||||
all_segments = split_segment_to_lines(orig_chars, max_chars)
|
||||
logger.info(f"Rebuilt {len(all_segments)} subtitle segments from original text")
|
||||
logger.info(f"Rebuilt {len(all_segments)} subtitle segments (linear fallback)")
|
||||
|
||||
logger.info(f"Generated {len(all_segments)} subtitle segments")
|
||||
return {"segments": all_segments}
|
||||
|
||||
Reference in New Issue
Block a user