feat: save current work to dev branch

This commit is contained in:
sanbuphy
2026-01-15 20:10:19 +08:00
parent c9e7ece75d
commit c8567ce23f
76 changed files with 28352 additions and 6 deletions
@@ -0,0 +1,284 @@
<template>
<div class="tokenization-demo">
<el-card shadow="never">
<div class="controls">
<el-button type="primary" @click="playDemo" :loading="isPlaying">
<el-icon><VideoPlay /></el-icon> 演示处理流程
</el-button>
</div>
<el-steps :active="activeStep" align-center finish-status="success" class="steps">
<el-step title="音频信号" description="连续波形" />
<el-step title="切片 (Chunking)" description="20ms/帧" />
<el-step title="量化 (Quantization)" description="查字典" />
<el-step title="Token 序列" description="离散数字" />
</el-steps>
<div class="stage-display">
<!-- Stage 0: Audio -->
<div v-if="activeStep === 0" class="stage-content audio-stage">
<div class="waveform-viz">
<div class="wave-bar" v-for="n in 20" :key="n"
:style="{ height: (30 + Math.random() * 50) + '%', animationDelay: n * 0.1 + 's' }"></div>
</div>
<div class="stage-desc">原始的连续模拟信号或高采样率数字信号</div>
</div>
<!-- Stage 1: Chunks -->
<div v-if="activeStep === 1" class="stage-content chunks-stage">
<div class="chunks-container">
<div class="chunk-item" v-for="n in 5" :key="n">
<span class="chunk-label">Frame {{n}}</span>
</div>
</div>
<div class="stage-desc">将音频切分为固定长度的小片段例如 20ms</div>
</div>
<!-- Stage 2: Codebook -->
<div v-if="activeStep === 2" class="stage-content codebook-stage">
<div class="codebook-grid">
<div class="codebook-entry" v-for="n in 9" :key="n" :class="{ 'highlight': n === currentMatch }">
{{ 1024 + n * 50 }}
</div>
</div>
<div class="stage-desc">在预训练的"声音字典"中寻找最接近的特征向量</div>
</div>
<!-- Stage 3: Tokens -->
<div v-if="activeStep === 3" class="stage-content token-stage">
<div class="token-list">
<el-tag v-for="(token, index) in tokens" :key="index" effect="dark" size="large" class="token-tag">
{{ token }}
</el-tag>
</div>
<div class="stage-desc">最终转换为 GPT 可以理解的数字序列</div>
</div>
</div>
<el-divider />
<div class="comparison-box">
<el-row :gutter="20">
<el-col :span="12">
<div class="compare-card">
<div class="compare-title">文本 GPT</div>
<div class="compare-content">
<el-tag type="info"></el-tag>
<el-tag type="info"></el-tag>
<el-tag type="info"></el-tag>
<el-tag type="info"></el-tag>
</div>
</div>
</el-col>
<el-col :span="12">
<div class="compare-card highlight-border">
<div class="compare-title">音频 GPT</div>
<div class="compare-content">
<el-tag type="warning">1024</el-tag>
<el-tag type="warning">5678</el-tag>
<el-tag type="warning">2340</el-tag>
<el-tag type="warning">8901</el-tag>
</div>
</div>
</el-col>
</el-row>
</div>
<el-alert
title="为什么要做 Tokenization?"
type="warning"
:closable="false"
description="因为 GPT 本质上是一个'预测下一个数字'的机器。只有把连续的声音变成离散的数字,才能用 GPT 来生成音频。"
show-icon
/>
</el-card>
</div>
</template>
<script setup>
import { ref } from 'vue'
import { VideoPlay } from '@element-plus/icons-vue'
const activeStep = ref(0)
const isPlaying = ref(false)
const currentMatch = ref(0)
const tokens = [1024, 5678, 2340, 8901, 3342]
const playDemo = async () => {
if (isPlaying.value) return
isPlaying.value = true
activeStep.value = 0
// Step 0 -> 1
await wait(1000)
activeStep.value = 1
// Step 1 -> 2
await wait(1500)
activeStep.value = 2
// Simulate codebook matching
for (let i = 0; i < 5; i++) {
currentMatch.value = Math.floor(Math.random() * 9) + 1
await wait(200)
}
currentMatch.value = 0
// Step 2 -> 3
activeStep.value = 3
isPlaying.value = false
}
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms))
</script>
<style scoped>
.tokenization-demo {
margin: 20px 0;
}
.controls {
text-align: center;
margin-bottom: 20px;
}
.steps {
margin-bottom: 30px;
}
.stage-display {
background: var(--el-fill-color-light);
border-radius: 8px;
padding: 30px;
min-height: 200px;
display: flex;
align-items: center;
justify-content: center;
flex-direction: column;
}
.stage-content {
text-align: center;
width: 100%;
}
.stage-desc {
margin-top: 15px;
color: var(--el-text-color-secondary);
font-size: 0.9em;
}
/* Audio Stage */
.waveform-viz {
height: 80px;
display: flex;
align-items: center;
justify-content: center;
gap: 3px;
}
.wave-bar {
width: 6px;
background: var(--el-color-primary);
border-radius: 3px;
animation: wave 1s ease-in-out infinite;
}
@keyframes wave {
0%, 100% { height: 30%; opacity: 0.5; }
50% { height: 100%; opacity: 1; }
}
/* Chunks Stage */
.chunks-container {
display: flex;
gap: 5px;
justify-content: center;
}
.chunk-item {
width: 60px;
height: 60px;
background: var(--el-color-primary-light-8);
border: 1px solid var(--el-color-primary);
border-radius: 4px;
display: flex;
align-items: center;
justify-content: center;
}
.chunk-label {
font-size: 10px;
color: var(--el-color-primary);
}
/* Codebook Stage */
.codebook-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 10px;
max-width: 300px;
margin: 0 auto;
}
.codebook-entry {
padding: 10px;
background: var(--el-bg-color);
border: 1px solid var(--el-border-color);
border-radius: 4px;
font-family: monospace;
transition: all 0.3s;
}
.codebook-entry.highlight {
background: var(--el-color-warning);
color: white;
transform: scale(1.1);
border-color: var(--el-color-warning);
}
/* Token Stage */
.token-list {
display: flex;
gap: 10px;
justify-content: center;
flex-wrap: wrap;
}
.token-tag {
font-family: monospace;
font-weight: bold;
}
.comparison-box {
margin-top: 20px;
margin-bottom: 20px;
}
.compare-card {
background: var(--el-bg-color-page);
padding: 15px;
border-radius: 8px;
text-align: center;
border: 1px solid transparent;
}
.highlight-border {
border-color: var(--el-color-warning);
background: var(--el-color-warning-light-9);
}
.compare-title {
font-weight: bold;
margin-bottom: 10px;
font-size: 0.9em;
}
.compare-content {
display: flex;
gap: 5px;
justify-content: center;
flex-wrap: wrap;
}
</style>
@@ -0,0 +1,160 @@
<template>
<div class="waveform-demo">
<div class="demo-container">
<!-- Step 1: Sound Wave -->
<div class="step-box">
<div class="label">🌊 声波</div>
<div class="wave-visual">
<svg viewBox="0 0 200 60" class="wave-svg">
<path
d="M 0 30 Q 10 10, 20 30 T 40 30 T 60 30 T 80 30 T 100 30 T 120 30 T 140 30 T 160 30 T 180 30 T 200 30"
fill="none"
stroke="#22c55e"
stroke-width="2"
/>
</svg>
</div>
<div class="desc">连续模拟信号</div>
</div>
<div class="arrow"></div>
<!-- Step 2: Sampling -->
<div class="step-box">
<div class="label">📊 采样</div>
<div class="sample-visual">
<div v-for="n in 10" :key="n" class="sample-bar"></div>
</div>
<div class="desc">44100 /</div>
</div>
<div class="arrow"></div>
<!-- Step 3: Digital -->
<div class="step-box">
<div class="label">🔢 数字化</div>
<div class="digital-visual">
<div v-for="n in 8" :key="n" class="bit">{{ Math.floor(Math.random() * 2) }}</div>
</div>
<div class="desc">PCM 数据</div>
</div>
</div>
<div class="explanation">
<p>
<span class="icon">💡</span>
计算机无法直接处理连续的声波需要把它转换成数字
这个过程叫<strong>模数转换 (ADC)</strong>每隔一小段时间测量一次声音的强度记录成数字
</p>
</div>
</div>
</template>
<style scoped>
.waveform-demo {
border: 1px solid var(--vp-c-divider);
border-radius: 8px;
padding: 20px;
background: var(--vp-c-bg-soft);
margin: 20px 0;
}
.demo-container {
display: flex;
align-items: center;
justify-content: space-around;
gap: 20px;
flex-wrap: wrap;
}
.step-box {
display: flex;
flex-direction: column;
align-items: center;
gap: 10px;
}
.label {
font-weight: bold;
font-size: 0.9em;
color: var(--vp-c-text-2);
}
.desc {
font-size: 0.8em;
color: var(--vp-c-text-3);
}
.wave-visual {
width: 200px;
height: 60px;
background: var(--vp-c-bg);
border-radius: 6px;
display: flex;
align-items: center;
justify-content: center;
padding: 10px;
}
.wave-svg {
width: 100%;
height: 100%;
}
.sample-visual {
display: flex;
gap: 3px;
align-items: flex-end;
height: 60px;
width: 120px;
background: var(--vp-c-bg);
border-radius: 6px;
padding: 10px;
}
.sample-bar {
width: 8px;
background: #22c55e;
border-radius: 2px;
flex: 1;
}
.digital-visual {
display: flex;
gap: 4px;
padding: 10px 15px;
background: var(--vp-c-bg);
border-radius: 6px;
}
.bit {
width: 20px;
height: 20px;
background: #3b82f6;
color: white;
border-radius: 3px;
display: flex;
align-items: center;
justify-content: center;
font-size: 0.75em;
font-weight: bold;
}
.arrow {
font-size: 1.5em;
color: var(--vp-c-text-3);
}
.explanation {
margin-top: 20px;
padding: 12px;
background: var(--vp-c-bg-mute);
border-radius: 6px;
font-size: 0.9em;
line-height: 1.6;
}
.icon {
font-size: 1.2em;
}
</style>
@@ -0,0 +1,241 @@
<template>
<div class="ar-comparison">
<el-card shadow="never">
<div class="controls">
<el-button type="primary" @click="playDemo" :loading="isPlaying" icon="VideoPlay">
开始对比演示
</el-button>
</div>
<div class="comparison-container">
<!-- Left: Autoregressive -->
<el-card shadow="hover" class="method-card">
<template #header>
<div class="method-header">
<el-icon :size="20" color="#F56C6C"><Timer /></el-icon>
<span class="method-title">自回归 (Autoregressive)</span>
</div>
</template>
<div class="method-body">
<div class="visual-area">
<div class="token-stream">
<transition-group name="list">
<el-tag
v-for="(token, i) in displayedArTokens"
:key="i"
type="danger"
class="token-item"
effect="plain"
>
{{ token }}
</el-tag>
</transition-group>
</div>
</div>
<div class="stats">
<el-descriptions :column="1" size="small" border>
<el-descriptions-item label="生成方式">串行 (Serial)</el-descriptions-item>
<el-descriptions-item label="速度">
<el-tag type="danger" size="small"> (Slow)</el-tag>
</el-descriptions-item>
</el-descriptions>
</div>
</div>
</el-card>
<!-- Right: Flow Matching -->
<el-card shadow="hover" class="method-card">
<template #header>
<div class="method-header">
<el-icon :size="20" color="#67C23A"><Lightning /></el-icon>
<span class="method-title">流匹配 (Flow Matching)</span>
</div>
</template>
<div class="method-body">
<div class="visual-area">
<div class="flow-field" :style="{ opacity: flowProgress }">
<div v-for="n in 20" :key="n" class="flow-bar"
:style="{ height: flowProgress * (30 + Math.random() * 70) + '%', transitionDelay: n * 0.02 + 's' }"></div>
</div>
<div class="flow-overlay" v-if="flowProgress < 1 && flowProgress > 0">
<el-icon class="is-loading"><Loading /></el-icon>
<span>Denoising...</span>
</div>
</div>
<div class="stats">
<el-descriptions :column="1" size="small" border>
<el-descriptions-item label="生成方式">并行 (Parallel)</el-descriptions-item>
<el-descriptions-item label="速度">
<el-tag type="success" size="small">极快 (Fast)</el-tag>
</el-descriptions-item>
</el-descriptions>
</div>
</div>
</el-card>
</div>
<el-divider />
<el-alert
title="技术演进"
type="success"
:closable="false"
show-icon
>
<template #default>
<p>
<strong>自回归</strong> ( VALL-E) 像人说话一样必须说完上一个字才能说下一个字所以很慢
<br>
<strong>流匹配</strong> ( F5-TTS) 像画画一样可以同时在画布的所有角落开始上色效率提升了 10-20
</p>
</template>
</el-alert>
</el-card>
</div>
</template>
<script setup>
import { ref, computed } from 'vue'
import { Timer, Lightning, VideoPlay, Loading } from '@element-plus/icons-vue'
const arTokensSource = [1024, 2048, 3072, 4096, 5120, 6144, 7168, 8192]
const displayedArTokens = ref([])
const flowProgress = ref(0)
const isPlaying = ref(false)
const playDemo = async () => {
if (isPlaying.value) return
isPlaying.value = true
displayedArTokens.value = []
flowProgress.value = 0
// Start Flow Matching (Fast)
const flowPromise = new Promise(resolve => {
let p = 0
const interval = setInterval(() => {
p += 0.05
flowProgress.value = p
if (p >= 1) {
clearInterval(interval)
resolve()
}
}, 50) // Total ~1s
})
// Start AR (Slow)
const arPromise = new Promise(async resolve => {
for (const token of arTokensSource) {
await new Promise(r => setTimeout(r, 400)) // 400ms per token
displayedArTokens.value.push(token)
}
resolve()
})
await Promise.all([flowPromise, arPromise])
isPlaying.value = false
}
</script>
<style scoped>
.ar-comparison {
margin: 20px 0;
}
.controls {
text-align: center;
margin-bottom: 20px;
}
.comparison-container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 20px;
margin-bottom: 20px;
}
@media (max-width: 768px) {
.comparison-container {
grid-template-columns: 1fr;
}
}
.method-card {
height: 100%;
}
.method-header {
display: flex;
align-items: center;
gap: 10px;
font-weight: bold;
}
.visual-area {
height: 120px;
background: var(--el-fill-color-light);
border-radius: 4px;
margin-bottom: 15px;
padding: 10px;
overflow: hidden;
position: relative;
display: flex;
align-items: center;
justify-content: center;
}
/* AR Styles */
.token-stream {
display: flex;
flex-wrap: wrap;
gap: 5px;
justify-content: flex-start;
align-content: flex-start;
width: 100%;
height: 100%;
}
.token-item {
font-family: monospace;
}
.list-enter-active,
.list-leave-active {
transition: all 0.5s ease;
}
.list-enter-from,
.list-leave-to {
opacity: 0;
transform: translateY(10px);
}
/* Flow Styles */
.flow-field {
width: 100%;
height: 100%;
display: flex;
align-items: flex-end;
justify-content: space-around;
gap: 2px;
}
.flow-bar {
flex: 1;
background: linear-gradient(to top, #67C23A, #95d475);
border-radius: 2px 2px 0 0;
transition: height 0.5s ease;
}
.flow-overlay {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(255,255,255,0.5);
display: flex;
align-items: center;
justify-content: center;
gap: 5px;
color: var(--el-text-color-secondary);
}
</style>
@@ -0,0 +1,241 @@
<template>
<div class="spectrogram-viz">
<el-card shadow="never">
<div class="viz-layout">
<!-- Left: Waveform -->
<div class="viz-box">
<div class="viz-header">
<span class="viz-title">🌊 波形 (Waveform)</span>
<el-tag size="small" type="success">Time Domain</el-tag>
</div>
<div class="viz-content waveform-container">
<div class="wave-bars">
<div v-for="n in 30" :key="n" class="wave-bar"
:style="{ height: 20 + Math.random() * 60 + '%', animationDelay: n * 0.05 + 's' }"></div>
</div>
<div class="axis-label x-axis">时间 (Time) </div>
<div class="axis-label y-axis">振幅 (Amplitude) </div>
</div>
</div>
<div class="transform-arrow">
<div class="arrow-content">
<span class="fft-text">FFT 变换</span>
<el-icon><Right /></el-icon>
</div>
</div>
<!-- Right: Spectrogram -->
<div class="viz-box">
<div class="viz-header">
<span class="viz-title">🎨 频谱图 (Spectrogram)</span>
<el-tag size="small" type="warning">Freq Domain</el-tag>
</div>
<div class="viz-content spectrogram-container">
<canvas ref="canvasRef" width="200" height="100"></canvas>
<div class="axis-label x-axis">时间 (Time) </div>
<div class="axis-label y-axis">频率 (Freq) </div>
</div>
</div>
</div>
<el-divider />
<el-alert
title="像看乐谱一样看声音"
type="info"
:closable="false"
show-icon
>
<template #default>
<div class="legend">
<div class="legend-item">
<div class="color-box low"></div> 低能量 (安静)
</div>
<div class="legend-item">
<div class="color-box high"></div> 高能量 (响亮)
</div>
</div>
<p>频谱图将一维的声音信号变成了二维图像这样我们就可以用 <strong>CNN (卷积神经网络)</strong> 等图像模型来处理声音了</p>
</template>
</el-alert>
</el-card>
</div>
</template>
<script setup>
import { ref, onMounted } from 'vue'
import { Right } from '@element-plus/icons-vue'
const canvasRef = ref(null)
onMounted(() => {
drawSpectrogram()
})
const drawSpectrogram = () => {
const canvas = canvasRef.value
if (!canvas) return
const ctx = canvas.getContext('2d')
const width = canvas.width
const height = canvas.height
// Draw heatmap
for (let x = 0; x < width; x += 4) {
for (let y = 0; y < height; y += 4) {
// Simulate frequency energy distribution
// Low frequencies (bottom) have more energy generally
// High frequencies (top) have less
const normalizedY = 1 - y / height
const baseEnergy = normalizedY * 0.8
const noise = Math.random() * 0.2
const timeVar = Math.sin(x * 0.1) * 0.2 // Time variation
let intensity = baseEnergy + noise + timeVar
intensity = Math.max(0, Math.min(1, intensity))
const hue = 240 - intensity * 240 // Blue (low) to Red (high)
ctx.fillStyle = `hsl(${hue}, 80%, 50%)`
ctx.fillRect(x, height - y - 4, 4, 4)
}
}
}
</script>
<style scoped>
.spectrogram-viz {
margin: 20px 0;
}
.viz-layout {
display: flex;
align-items: center;
justify-content: space-around;
flex-wrap: wrap;
gap: 15px;
}
.viz-box {
flex: 1;
min-width: 250px;
display: flex;
flex-direction: column;
gap: 10px;
}
.viz-header {
display: flex;
justify-content: space-between;
align-items: center;
}
.viz-title {
font-weight: bold;
font-size: 0.9em;
}
.viz-content {
position: relative;
background: #1a1a1a;
border-radius: 6px;
height: 140px;
padding: 10px 10px 20px 25px; /* Space for axis labels */
overflow: hidden;
}
.waveform-container {
display: flex;
align-items: center;
justify-content: center;
}
.wave-bars {
display: flex;
align-items: center;
gap: 2px;
height: 100%;
width: 100%;
}
.wave-bar {
flex: 1;
background: var(--el-color-success);
border-radius: 2px;
animation: wave 1.5s ease-in-out infinite;
}
@keyframes wave {
0%, 100% { height: 20%; opacity: 0.6; }
50% { height: 90%; opacity: 1; }
}
.transform-arrow {
display: flex;
flex-direction: column;
align-items: center;
color: var(--el-text-color-secondary);
}
.arrow-content {
display: flex;
flex-direction: column;
align-items: center;
font-size: 1.2em;
}
.fft-text {
font-size: 0.7em;
margin-bottom: 5px;
}
.spectrogram-container canvas {
width: 100%;
height: 100%;
border-radius: 4px;
}
.axis-label {
position: absolute;
font-size: 9px;
color: #666;
}
.x-axis {
bottom: 2px;
right: 10px;
}
.y-axis {
top: 10px;
left: 2px;
writing-mode: vertical-rl;
transform: rotate(180deg);
}
.legend {
display: flex;
gap: 15px;
margin-bottom: 10px;
font-size: 0.8em;
}
.legend-item {
display: flex;
align-items: center;
gap: 5px;
}
.color-box {
width: 12px;
height: 12px;
border-radius: 2px;
}
.color-box.low {
background: hsl(240, 80%, 50%);
}
.color-box.high {
background: hsl(0, 80%, 50%);
}
</style>