Files
test-repo/docs/.vitepress/theme/components/appendix/audio-intro/TTSPipelineDemo.vue
T
sanbuphy 0eba9e87e9 fix(eslint): reduce warnings in GitHub Actions deployment
- Disable formatting rules (handled by Prettier)
- Relaxed strict Vue/JS rules for demo code compatibility
- Fix syntax errors in ApiPlayground and VoiceCloningDemo
- Fix duplicate else-if condition in ApiPlayground
- Fix Promise executor async pattern in AutoregressiveAudioDemo
- Add TypeScript file support to ESLint config

Warnings reduced from 295 to 251 problems.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-18 17:38:10 +08:00

654 lines
16 KiB
Vue
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!--
TTSPipelineDemo.vue
TTS 流程演示组件
用途
展示文本转语音的完整流程对比不同架构自回归/非自回归/流匹配
-->
<template>
<div class="tts-pipeline-demo">
<div class="header">
<div class="title">
🔄 TTS 架构演进从慢到快
</div>
<div class="subtitle">
探索文本如何变成语音以及不同架构的优劣对比
</div>
</div>
<div class="arch-selector">
<button
v-for="arch in architectures"
:key="arch.id"
class="arch-btn"
:class="{ active: selectedArch === arch.id }"
@click="selectArch(arch.id)"
>
<span class="arch-icon">{{ arch.icon }}</span>
<span class="arch-name">{{ arch.name }}</span>
<span
class="arch-tag"
:class="arch.tagClass"
>{{ arch.tag }}</span>
</button>
</div>
<div class="pipeline-flow">
<div
v-for="(stage, index) in currentStages"
:key="stage.id"
class="stage"
:class="{ active: activeStage === index }"
@click="activeStage = index"
>
<div class="stage-num">
{{ index + 1 }}
</div>
<div class="stage-content">
<div class="stage-icon">
{{ stage.icon }}
</div>
<div class="stage-name">
{{ stage.name }}
</div>
<div class="stage-desc">
{{ stage.shortDesc }}
</div>
</div>
<div
v-if="index < currentStages.length - 1"
class="stage-arrow"
>
</div>
</div>
</div>
<div
v-if="currentStage"
class="stage-detail"
>
<div class="detail-header">
<span class="detail-icon">{{ currentStage.icon }}</span>
<div>
<div class="detail-name">
{{ currentStage.name }}
</div>
<div class="detail-desc">
{{ currentStage.description }}
</div>
</div>
</div>
<div class="detail-canvas">
<canvas
ref="detailCanvas"
width="500"
height="150"
/>
</div>
<div class="detail-meta">
<div class="meta-item">
<span class="label">输入:</span>
<span>{{ currentStage.input }}</span>
</div>
<div class="meta-item">
<span class="label">输出:</span>
<span>{{ currentStage.output }}</span>
</div>
<div class="meta-item">
<span class="label">技术:</span>
<span>{{ currentStage.tech }}</span>
</div>
</div>
</div>
<div class="comparison-table">
<div class="table-title">
📊 架构对比
</div>
<div class="table">
<div class="table-header">
<div class="cell">
特性
</div>
<div class="cell">
自回归
</div>
<div class="cell">
非自回归
</div>
<div class="cell">
流匹配
</div>
</div>
<div
v-for="row in comparisonRows"
:key="row.feature"
class="table-row"
>
<div class="cell feature">
{{ row.feature }}
</div>
<div
class="cell"
:class="{ highlight: selectedArch === 'ar' }"
>
{{ row.ar }}
</div>
<div
class="cell"
:class="{ highlight: selectedArch === 'nar' }"
>
{{ row.nar }}
</div>
<div
class="cell"
:class="{ highlight: selectedArch === 'flow' }"
>
{{ row.flow }}
</div>
</div>
</div>
</div>
<div class="models-section">
<div class="models-title">
🏆 代表模型
</div>
<div class="models-grid">
<div
v-for="model in models"
:key="model.name"
class="model-card"
:class="{ active: model.arch === selectedArch }"
>
<div class="model-name">
{{ model.name }}
</div>
<span
class="model-tag"
:class="model.tagClass"
>{{ model.type }}</span>
<div class="model-desc">
{{ model.desc }}
</div>
</div>
</div>
</div>
<div class="info-box">
<span class="icon">💡</span>
<p>
<strong>TTS 演进趋势</strong>
从早期的自回归模型 Tacotron到非自回归 FastSpeech再到最新的流匹配模型 F5-TTS
TTS 技术正在向更快更稳定更高质量的方向发展
</p>
</div>
</div>
</template>
<script setup>
import { ref, computed, onMounted, watch } from 'vue'
const architectures = [
{ id: 'ar', name: '自回归', icon: '📝', tag: 'AR', tagClass: 'primary' },
{ id: 'nar', name: '非自回归', icon: '⚡', tag: 'NAR', tagClass: 'success' },
{ id: 'flow', name: '流匹配', icon: '🌊', tag: 'Flow', tagClass: 'warning' }
]
const pipelineStages = {
ar: [
{ id: 'text', name: '文本处理', icon: '📝', shortDesc: '分词 & 音素', description: '将输入文本转换为音素序列', input: '原始文本', output: '音素序列', tech: 'G2P' },
{ id: 'encoder', name: '文本编码', icon: '🔢', shortDesc: '提取特征', description: '使用 Encoder 编码文本', input: '音素序列', output: '文本特征', tech: 'Transformer' },
{ id: 'decoder', name: '自回归解码', icon: '🎯', shortDesc: '逐帧生成', description: '逐个时间步生成梅尔频谱', input: '文本特征', output: '梅尔频谱', tech: 'AR Decoder' },
{ id: 'vocoder', name: '声码器', icon: '🔊', shortDesc: '频谱转波形', description: '将频谱转换为音频波形', input: '梅尔频谱', output: '音频波形', tech: 'HiFi-GAN' }
],
nar: [
{ id: 'text', name: '文本处理', icon: '📝', shortDesc: '分词 & 音素', description: '将输入文本转换为音素序列', input: '原始文本', output: '音素序列', tech: 'G2P' },
{ id: 'duration', name: '时长预测', icon: '⏱️', shortDesc: '预测时长', description: '预测每个音素的帧数', input: '音素序列', output: '时长信息', tech: 'Duration Predictor' },
{ id: 'decoder', name: '并行解码', icon: '⚡', shortDesc: '一次性生成', description: '并行生成完整梅尔频谱', input: '文本特征', output: '梅尔频谱', tech: 'Non-AR Transformer' },
{ id: 'vocoder', name: '声码器', icon: '🔊', shortDesc: '频谱转波形', description: '将频谱转换为音频波形', input: '梅尔频谱', output: '音频波形', tech: 'HiFi-GAN' }
],
flow: [
{ id: 'text', name: '文本处理', icon: '📝', shortDesc: '分词 & 音素', description: '将输入文本转换为音素序列', input: '原始文本', output: '音素序列', tech: 'G2P' },
{ id: 'embedding', name: '文本嵌入', icon: '🔢', shortDesc: '特征提取', description: '将音素转换为向量', input: '音素序列', output: '文本嵌入', tech: 'DiT' },
{ id: 'flow', name: '流匹配', icon: '🌊', shortDesc: '最优传输', description: '使用流匹配生成频谱', input: '文本嵌入', output: '梅尔频谱', tech: 'Flow Matching' },
{ id: 'vocoder', name: '声码器', icon: '🔊', shortDesc: '频谱转波形', description: '将频谱转换为音频波形', input: '梅尔频谱', output: '音频波形', tech: 'Vocoder' }
]
}
const comparisonRows = [
{ feature: '生成速度', ar: '慢', nar: '快', flow: '很快' },
{ feature: '音质', ar: '高', nar: '中高', flow: '高' },
{ feature: '稳定性', ar: '中', nar: '高', flow: '高' },
{ feature: '可控性', ar: '中', nar: '高', flow: '高' }
]
const models = [
{ name: 'Tacotron 2', arch: 'ar', type: 'AR', tagClass: 'primary', desc: '经典 AR 模型,音质优秀' },
{ name: 'FastSpeech 2', arch: 'nar', type: 'NAR', tagClass: 'success', desc: '并行生成,速度快' },
{ name: 'F5-TTS', arch: 'flow', type: 'Flow', tagClass: 'warning', desc: '最新 SOTA10 步生成' },
{ name: 'CosyVoice', arch: 'flow', type: 'Flow', tagClass: 'warning', desc: '阿里开源,支持多语言' }
]
const selectedArch = ref('flow')
const activeStage = ref(0)
const detailCanvas = ref(null)
const currentStages = computed(() => pipelineStages[selectedArch.value])
const currentStage = computed(() => currentStages.value[activeStage.value])
const selectArch = (id) => {
selectedArch.value = id
activeStage.value = 0
}
const drawVisualization = () => {
const canvas = detailCanvas.value
if (!canvas) return
const ctx = canvas.getContext('2d')
const w = canvas.width
const h = canvas.height
ctx.clearRect(0, 0, w, h)
const stage = currentStage.value
if (!stage) return
// 根据阶段绘制不同的可视化
if (stage.id === 'text') {
// 文本到音素
ctx.font = '16px sans-serif'
ctx.fillStyle = '#333'
ctx.fillText('"Hello"', 50, h/2)
ctx.strokeStyle = '#409eff'
ctx.lineWidth = 2
ctx.beginPath()
ctx.moveTo(120, h/2)
ctx.lineTo(200, h/2)
ctx.stroke()
const phonemes = ['h', 'ə', 'l', 'oʊ']
let x = 220
phonemes.forEach((p, i) => {
ctx.fillStyle = `hsl(${200 + i * 30}, 70%, 50%)`
ctx.fillRect(x, h/2 - 15, 30, 30)
ctx.fillStyle = '#fff'
ctx.fillText(p, x + 8, h/2 + 5)
x += 40
})
} else if (stage.id === 'decoder' && selectedArch.value === 'ar') {
// 自回归解码
for (let i = 0; i < 5; i++) {
const x = 80 + i * 80
for (let j = 0; j < 8; j++) {
const barH = Math.random() * 40 + 10
ctx.fillStyle = `rgba(64, 158, 255, ${0.5 + i * 0.1})`
ctx.fillRect(x + j * 8, h - 50 - barH, 6, barH)
}
if (i < 4) {
ctx.strokeStyle = '#ccc'
ctx.beginPath()
ctx.moveTo(x + 70, h/2)
ctx.lineTo(x + 80, h/2)
ctx.stroke()
}
}
ctx.fillStyle = '#666'
ctx.fillText('逐个时间步生成', 50, 30)
} else if (stage.id === 'flow') {
// 流匹配
ctx.strokeStyle = '#409eff'
ctx.lineWidth = 3
ctx.beginPath()
ctx.moveTo(50, h - 50)
for (let t = 0; t <= 1; t += 0.02) {
const x = 50 + t * 400
const y = h - 50 - t * (h - 100) + Math.sin(t * Math.PI * 4) * 20
ctx.lineTo(x, y)
}
ctx.stroke()
const steps = [0, 0.25, 0.5, 0.75, 1]
steps.forEach((t, i) => {
const x = 50 + t * 400
const y = h - 50 - t * (h - 100) + Math.sin(t * Math.PI * 4) * 20
ctx.fillStyle = '#e6a23c'
ctx.beginPath()
ctx.arc(x, y, 6, 0, Math.PI * 2)
ctx.fill()
})
}
}
onMounted(drawVisualization)
watch([selectedArch, activeStage], drawVisualization)
</script>
<style scoped>
.tts-pipeline-demo {
background: var(--vp-c-bg-soft);
border: 1px solid var(--vp-c-divider);
border-radius: 12px;
padding: 24px;
margin: 24px 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
}
.header {
text-align: center;
margin-bottom: 24px;
}
.title {
font-size: 18px;
font-weight: 700;
margin-bottom: 8px;
background: linear-gradient(120deg, #409eff, #67c23a);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.subtitle {
font-size: 14px;
color: var(--vp-c-text-2);
}
.arch-selector {
display: flex;
gap: 12px;
margin-bottom: 24px;
flex-wrap: wrap;
justify-content: center;
}
.arch-btn {
padding: 12px 20px;
border: 1px solid var(--vp-c-divider);
border-radius: 6px;
background: var(--vp-c-bg);
cursor: pointer;
display: flex;
align-items: center;
gap: 8px;
transition: all 0.2s;
}
.arch-btn:hover {
border-color: var(--vp-c-brand);
}
.arch-btn.active {
border-color: var(--vp-c-brand);
background: var(--vp-c-bg-mute);
}
.arch-icon {
font-size: 20px;
}
.arch-name {
font-weight: 500;
}
.arch-tag {
font-size: 10px;
padding: 2px 6px;
border-radius: 4px;
}
.arch-tag.primary { background: #409eff33; color: #409eff; }
.arch-tag.success { background: #67c23a33; color: #67c23a; }
.arch-tag.warning { background: #e6a23c33; color: #e6a23c; }
.pipeline-flow {
display: flex;
justify-content: center;
gap: 8px;
flex-wrap: wrap;
padding: 20px;
background: var(--vp-c-bg);
border-radius: 6px;
margin-bottom: 20px;
}
.stage {
display: flex;
align-items: center;
gap: 8px;
cursor: pointer;
}
.stage-content {
background: var(--vp-c-bg-soft);
border: 2px solid var(--vp-c-divider);
border-radius: 6px;
padding: 12px 16px;
text-align: center;
transition: all 0.2s;
min-width: 100px;
}
.stage:hover .stage-content,
.stage.active .stage-content {
border-color: var(--vp-c-brand);
background: var(--vp-c-bg-mute);
}
.stage-num {
width: 24px;
height: 24px;
background: var(--vp-c-brand);
color: white;
border-radius: 50%;
display: flex;
align-items: center;
justify-content: center;
font-size: 12px;
font-weight: 600;
}
.stage-icon {
font-size: 24px;
margin-bottom: 4px;
}
.stage-name {
font-weight: 500;
font-size: 13px;
}
.stage-desc {
font-size: 11px;
color: var(--vp-c-text-3);
}
.stage-arrow {
color: var(--vp-c-text-3);
font-size: 20px;
}
.stage-detail {
background: var(--vp-c-bg);
border-radius: 6px;
padding: 20px;
margin-bottom: 20px;
}
.detail-header {
display: flex;
gap: 12px;
margin-bottom: 16px;
}
.detail-icon {
font-size: 32px;
}
.detail-name {
font-weight: 600;
margin-bottom: 4px;
}
.detail-desc {
font-size: 13px;
color: var(--vp-c-text-2);
}
.detail-canvas {
background: var(--vp-c-bg-soft);
border-radius: 6px;
margin-bottom: 16px;
}
.detail-canvas canvas {
width: 100%;
height: auto;
}
.detail-meta {
display: flex;
gap: 24px;
flex-wrap: wrap;
}
.meta-item {
font-size: 13px;
}
.meta-item .label {
color: var(--vp-c-text-3);
margin-right: 4px;
}
.comparison-table {
background: var(--vp-c-bg);
border-radius: 6px;
padding: 20px;
margin-bottom: 20px;
}
.table-title {
font-weight: 600;
margin-bottom: 16px;
text-align: center;
}
.table {
display: flex;
flex-direction: column;
gap: 1px;
background: var(--vp-c-divider);
border-radius: 6px;
overflow: hidden;
}
.table-header,
.table-row {
display: grid;
grid-template-columns: 1fr 1fr 1fr 1fr;
background: var(--vp-c-bg);
}
.table-header {
font-weight: 600;
background: var(--vp-c-bg-mute);
}
.cell {
padding: 12px;
text-align: center;
font-size: 13px;
}
.cell.feature {
text-align: left;
font-weight: 500;
}
.cell.highlight {
background: rgba(64, 158, 255, 0.1);
color: var(--vp-c-brand);
font-weight: 500;
}
.models-section {
background: var(--vp-c-bg);
border-radius: 6px;
padding: 20px;
margin-bottom: 20px;
}
.models-title {
font-weight: 600;
margin-bottom: 16px;
text-align: center;
}
.models-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 12px;
}
.model-card {
background: var(--vp-c-bg-soft);
border-radius: 6px;
padding: 16px;
text-align: center;
border: 2px solid transparent;
transition: all 0.2s;
}
.model-card.active {
border-color: var(--vp-c-brand);
}
.model-name {
font-weight: 600;
margin-bottom: 8px;
}
.model-tag {
font-size: 10px;
padding: 2px 8px;
border-radius: 4px;
display: inline-block;
margin-bottom: 8px;
}
.model-tag.primary { background: #409eff33; color: #409eff; }
.model-tag.success { background: #67c23a33; color: #67c23a; }
.model-tag.warning { background: #e6a23c33; color: #e6a23c; }
.model-desc {
font-size: 12px;
color: var(--vp-c-text-3);
}
.info-box {
display: flex;
gap: 12px;
padding: 16px;
background: var(--vp-c-bg-mute);
border-radius: 6px;
font-size: 13px;
line-height: 1.6;
}
.info-box .icon {
font-size: 18px;
flex-shrink: 0;
}
@media (max-width: 640px) {
.pipeline-flow {
flex-direction: column;
}
.stage-arrow {
transform: rotate(90deg);
}
}
</style>