feat: add AI and Backend evolution history with interactive demos, and refine Frontend evolution demo
This commit is contained in:
@@ -1,57 +1,110 @@
|
||||
<template>
|
||||
<div class="attn-demo">
|
||||
<div class="controls">
|
||||
<span class="hint">🖱️ 把鼠标悬停在方块上,查看它的“注意力”分配</span>
|
||||
<div class="header">
|
||||
<div class="title">Self-Attention Mechanism</div>
|
||||
<div class="subtitle">自注意力机制:全局信息交互</div>
|
||||
</div>
|
||||
|
||||
<div class="visual-area">
|
||||
<div class="image-grid" @mouseleave="hoverIndex = -1">
|
||||
<div class="visual-stage">
|
||||
<!-- Grid Layout -->
|
||||
<div class="grid-container" @mouseleave="hoverIndex = -1">
|
||||
<!-- SVG Layer for Connection Lines -->
|
||||
<svg class="connections-layer">
|
||||
<defs>
|
||||
<marker id="arrowhead" markerWidth="6" markerHeight="4" refX="18" refY="2" orient="auto">
|
||||
<polygon points="0 0, 6 2, 0 4" fill="var(--vp-c-brand)" opacity="0.6"/>
|
||||
</marker>
|
||||
</defs>
|
||||
<!-- Draw lines from hoverIndex to ALL other nodes -->
|
||||
<g v-if="hoverIndex !== -1">
|
||||
<line
|
||||
v-for="(target, tIndex) in items"
|
||||
:key="tIndex"
|
||||
v-show="tIndex !== hoverIndex"
|
||||
:x1="getCenter(hoverIndex).x"
|
||||
:y1="getCenter(hoverIndex).y"
|
||||
:x2="getCenter(tIndex).x"
|
||||
:y2="getCenter(tIndex).y"
|
||||
:stroke="getLineColor(hoverIndex, tIndex)"
|
||||
:stroke-width="getLineWidth(hoverIndex, tIndex)"
|
||||
stroke-linecap="round"
|
||||
:opacity="getLineOpacity(hoverIndex, tIndex)"
|
||||
/>
|
||||
</g>
|
||||
</svg>
|
||||
|
||||
<!-- Cells -->
|
||||
<div
|
||||
v-for="(item, index) in items"
|
||||
:key="index"
|
||||
class="grid-cell"
|
||||
:class="{ active: hoverIndex === index }"
|
||||
:class="{
|
||||
'is-source': hoverIndex === index,
|
||||
'is-target': hoverIndex !== -1 && hoverIndex !== index,
|
||||
'is-strong-attn': hoverIndex !== -1 && getAttentionScore(hoverIndex, index) > 0.5
|
||||
}"
|
||||
@mouseenter="hoverIndex = index"
|
||||
:style="{
|
||||
left: getCenter(index).x - 30 + 'px',
|
||||
top: getCenter(index).y - 30 + 'px'
|
||||
}"
|
||||
>
|
||||
{{ item.icon }}
|
||||
<div class="cell-label">{{ item.label }}</div>
|
||||
<div class="cell-content">
|
||||
<span class="cell-icon">{{ item.icon }}</span>
|
||||
<span class="cell-label">{{ item.label }}</span>
|
||||
</div>
|
||||
<!-- Attention Score Badge -->
|
||||
<div
|
||||
class="attn-badge"
|
||||
v-if="hoverIndex !== -1 && hoverIndex !== index"
|
||||
:style="{ opacity: Math.max(0.3, getAttentionScore(hoverIndex, index)) }"
|
||||
>
|
||||
{{ (getAttentionScore(hoverIndex, index) * 100).toFixed(0) }}%
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- SVG Overlay for lines -->
|
||||
<svg class="connections" v-if="hoverIndex !== -1">
|
||||
<line
|
||||
v-for="(target, tIndex) in items"
|
||||
:key="tIndex"
|
||||
v-if="tIndex !== hoverIndex"
|
||||
:x1="getCenter(hoverIndex).x"
|
||||
:y1="getCenter(hoverIndex).y"
|
||||
:x2="getCenter(tIndex).x"
|
||||
:y2="getCenter(tIndex).y"
|
||||
:stroke="getAttentionColor(hoverIndex, tIndex)"
|
||||
:stroke-width="getAttentionWidth(hoverIndex, tIndex)"
|
||||
stroke-linecap="round"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
|
||||
<div class="info-panel" :class="{ visible: hoverIndex !== -1 }">
|
||||
<div class="info-title">Patch: {{ items[hoverIndex]?.label }}</div>
|
||||
<div class="info-desc">正在关注:</div>
|
||||
<ul class="attn-list" v-if="hoverIndex !== -1">
|
||||
<li
|
||||
v-for="(weight, targetIdx) in getTopAttentions(hoverIndex)"
|
||||
:key="targetIdx"
|
||||
>
|
||||
<span class="target-icon">{{ items[targetIdx].icon }}</span>
|
||||
<span class="target-name">{{ items[targetIdx].label }}</span>
|
||||
<div class="bar-bg">
|
||||
<div
|
||||
class="bar-fill"
|
||||
:style="{ width: weight * 100 + '%' }"
|
||||
></div>
|
||||
<!-- Info Panel -->
|
||||
<div class="info-panel">
|
||||
<div v-if="hoverIndex === -1" class="placeholder-text">
|
||||
<span class="cursor-icon">👆</span>
|
||||
把鼠标悬停在任意方块上,<br>观察它在"关注"谁
|
||||
</div>
|
||||
<div v-else class="active-info">
|
||||
<div class="source-info">
|
||||
<span class="label">当前 Patch:</span>
|
||||
<div class="patch-tag">
|
||||
{{ items[hoverIndex].icon }} {{ items[hoverIndex].label }}
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="attn-list">
|
||||
<div class="list-header">Attention Weights (注意力权重)</div>
|
||||
<div
|
||||
class="attn-item"
|
||||
v-for="(score, idx) in getTopAttentions(hoverIndex)"
|
||||
:key="idx"
|
||||
>
|
||||
<div class="item-left">
|
||||
<span class="item-icon">{{ items[idx].icon }}</span>
|
||||
<span class="item-name">{{ items[idx].label }}</span>
|
||||
</div>
|
||||
<div class="item-right">
|
||||
<div class="progress-bar">
|
||||
<div class="progress-fill" :style="{ width: score * 100 + '%' }"></div>
|
||||
</div>
|
||||
<span class="score-text">{{ (score * 100).toFixed(0) }}%</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="insight-box">
|
||||
<span class="bulb">💡</span>
|
||||
<span class="insight-text">
|
||||
{{ getInsightText(hoverIndex) }}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -62,207 +115,309 @@ import { ref } from 'vue'
|
||||
|
||||
const hoverIndex = ref(-1)
|
||||
|
||||
// 3x3 Grid Data (Cat in grass)
|
||||
const items = [
|
||||
{ icon: '🌲', label: '背景' },
|
||||
{ icon: '🌲', label: '背景' },
|
||||
{ icon: '☁️', label: '天空' },
|
||||
{ icon: '👂', label: '猫耳' },
|
||||
{ icon: '😼', label: '猫脸' },
|
||||
{ icon: '🌲', label: '背景' },
|
||||
{ icon: '🐾', label: '猫爪' },
|
||||
{ icon: '🧶', label: '毛线' },
|
||||
{ icon: '🌱', label: '草地' }
|
||||
{ icon: '🌿', label: '草地' }, // 0
|
||||
{ icon: '🌿', label: '草地' }, // 1
|
||||
{ icon: '🦋', label: '蝴蝶' }, // 2
|
||||
{ icon: '🌿', label: '草地' }, // 3
|
||||
{ icon: '🐱', label: '猫头' }, // 4
|
||||
{ icon: '🌿', label: '草地' }, // 5
|
||||
{ icon: '🧶', label: '毛球' }, // 6
|
||||
{ icon: '🐾', label: '猫爪' }, // 7
|
||||
{ icon: '🌿', label: '草地' } // 8
|
||||
]
|
||||
|
||||
// 3x3 Grid
|
||||
// Layout Logic
|
||||
const getCenter = (index) => {
|
||||
const row = Math.floor(index / 3)
|
||||
const col = index % 3
|
||||
// Assuming 80px cell + 10px gap
|
||||
const cellSize = 80
|
||||
const gap = 10
|
||||
const offset = cellSize / 2
|
||||
const gap = 100
|
||||
const offsetX = 50
|
||||
const offsetY = 50
|
||||
return {
|
||||
x: col * (cellSize + gap) + offset,
|
||||
y: row * (cellSize + gap) + offset
|
||||
x: col * gap + offsetX,
|
||||
y: row * gap + offsetY
|
||||
}
|
||||
}
|
||||
|
||||
// Mock attention weights
|
||||
const getAttentionWeight = (source, target) => {
|
||||
// Self attention is ignored for visualization clarity usually, but let's say:
|
||||
// Attention Logic
|
||||
const getAttentionScore = (source, target) => {
|
||||
if (source === target) return 0
|
||||
|
||||
// Cat Head (4) attends strongly to:
|
||||
if (source === 4) {
|
||||
if (target === 7) return 0.95 // Paws (Body parts connected)
|
||||
if (target === 2) return 0.8 // Butterfly (Interest)
|
||||
if (target === 6) return 0.6 // Yarn (Toy)
|
||||
return 0.1 // Background
|
||||
}
|
||||
|
||||
// Cat parts (3, 4, 6) attend strongly to each other
|
||||
const catParts = [3, 4, 6]
|
||||
const isSourceCat = catParts.includes(source)
|
||||
const isTargetCat = catParts.includes(target)
|
||||
// Cat Paws (7) attends strongly to:
|
||||
if (source === 7) {
|
||||
if (target === 4) return 0.95 // Head
|
||||
if (target === 6) return 0.9 // Yarn (Touching)
|
||||
return 0.1
|
||||
}
|
||||
|
||||
if (isSourceCat && isTargetCat) return 0.9 // Strong connection between cat parts
|
||||
// Butterfly (2)
|
||||
if (source === 2) {
|
||||
if (target === 4) return 0.7 // Danger?
|
||||
return 0.2
|
||||
}
|
||||
|
||||
// Cat interacts with Yarn (7)
|
||||
if (isSourceCat && target === 7) return 0.7
|
||||
if (source === 7 && isTargetCat) return 0.7
|
||||
// Grass (Background)
|
||||
// Background patches attend to each other for texture consistency
|
||||
const bgIndices = [0, 1, 3, 5, 8]
|
||||
if (bgIndices.includes(source)) {
|
||||
if (bgIndices.includes(target)) return 0.6
|
||||
return 0.05
|
||||
}
|
||||
|
||||
// Background parts attend to each other
|
||||
const bgParts = [0, 1, 2, 5, 8]
|
||||
if (bgParts.includes(source) && bgParts.includes(target)) return 0.5
|
||||
|
||||
return 0.1 // Weak attention otherwise
|
||||
// Default fallback
|
||||
return 0.1
|
||||
}
|
||||
|
||||
const getAttentionColor = (source, target) => {
|
||||
const weight = getAttentionWeight(source, target)
|
||||
// Green for strong, gray for weak
|
||||
if (weight > 0.6) return `rgba(16, 185, 129, ${weight})`
|
||||
return `rgba(156, 163, 175, ${weight * 0.5})`
|
||||
const getLineColor = (source, target) => {
|
||||
const score = getAttentionScore(source, target)
|
||||
return score > 0.5 ? 'var(--vp-c-brand)' : 'var(--vp-c-text-3)'
|
||||
}
|
||||
|
||||
const getAttentionWidth = (source, target) => {
|
||||
const weight = getAttentionWeight(source, target)
|
||||
return weight * 5
|
||||
const getLineWidth = (source, target) => {
|
||||
const score = getAttentionScore(source, target)
|
||||
return 1 + score * 4
|
||||
}
|
||||
|
||||
const getLineOpacity = (source, target) => {
|
||||
const score = getAttentionScore(source, target)
|
||||
return 0.2 + score * 0.8
|
||||
}
|
||||
|
||||
const getTopAttentions = (source) => {
|
||||
const weights = {}
|
||||
const scores = {}
|
||||
items.forEach((_, idx) => {
|
||||
if (idx !== source) {
|
||||
weights[idx] = getAttentionWeight(source, idx)
|
||||
scores[idx] = getAttentionScore(source, idx)
|
||||
}
|
||||
})
|
||||
// Sort by weight desc
|
||||
return weights
|
||||
// Sort descending
|
||||
const sortedKeys = Object.keys(scores).sort((a, b) => scores[b] - scores[a])
|
||||
const top3 = {}
|
||||
sortedKeys.slice(0, 3).forEach(key => {
|
||||
top3[key] = scores[key]
|
||||
})
|
||||
return top3
|
||||
}
|
||||
|
||||
const getInsightText = (idx) => {
|
||||
if (idx === 4) return "猫头最关注猫爪(组成身体)和蝴蝶(捕猎目标)。"
|
||||
if (idx === 7) return "猫爪最关注毛球(正在玩耍)和猫头。"
|
||||
if (idx === 2) return "蝴蝶关注到了猫,可能是因为它是个威胁。"
|
||||
if ([0,1,3,5,8].includes(idx)) return "草地主要关注周围的草地,确认背景纹理。"
|
||||
if (idx === 6) return "毛球和猫爪有很强的互动关系。"
|
||||
return "Self-Attention 让每个部分找到它的上下文关联。"
|
||||
}
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
.attn-demo {
|
||||
padding: 20px;
|
||||
background: var(--vp-c-bg-soft);
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
border-radius: 12px;
|
||||
padding: 24px;
|
||||
margin: 20px 0;
|
||||
user-select: none;
|
||||
font-family: 'Menlo', 'Monaco', sans-serif;
|
||||
}
|
||||
|
||||
.controls {
|
||||
.header {
|
||||
text-align: center;
|
||||
margin-bottom: 20px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.hint {
|
||||
font-size: 0.9em;
|
||||
color: var(--vp-c-text-2);
|
||||
background: var(--vp-c-bg);
|
||||
padding: 4px 12px;
|
||||
border-radius: 12px;
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
.title {
|
||||
font-size: 16px;
|
||||
font-weight: bold;
|
||||
color: var(--vp-c-text-1);
|
||||
}
|
||||
|
||||
.visual-area {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
gap: 40px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.image-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(3, 80px);
|
||||
gap: 10px;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.grid-cell {
|
||||
width: 80px;
|
||||
height: 80px;
|
||||
background: var(--vp-c-bg);
|
||||
border: 2px solid var(--vp-c-divider);
|
||||
border-radius: 8px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
z-index: 2;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.grid-cell:hover,
|
||||
.grid-cell.active {
|
||||
border-color: var(--vp-c-brand);
|
||||
transform: scale(1.05);
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
||||
background: var(--vp-c-bg-mute);
|
||||
}
|
||||
|
||||
.cell-label {
|
||||
font-size: 0.8em;
|
||||
.subtitle {
|
||||
font-size: 12px;
|
||||
color: var(--vp-c-text-2);
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.connections {
|
||||
.visual-stage {
|
||||
display: flex;
|
||||
gap: 40px;
|
||||
justify-content: center;
|
||||
align-items: flex-start;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
/* Grid Area */
|
||||
.grid-container {
|
||||
width: 300px;
|
||||
height: 300px;
|
||||
position: relative;
|
||||
/* background: rgba(0,0,0,0.02); */
|
||||
border-radius: 12px;
|
||||
}
|
||||
|
||||
.connections-layer {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
pointer-events: none;
|
||||
z-index: 1;
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.grid-cell {
|
||||
position: absolute;
|
||||
width: 60px;
|
||||
height: 60px;
|
||||
background: var(--vp-c-bg);
|
||||
border: 2px solid var(--vp-c-divider);
|
||||
border-radius: 12px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
cursor: pointer;
|
||||
z-index: 2;
|
||||
transition: all 0.3s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||
box-shadow: 0 4px 6px rgba(0,0,0,0.05);
|
||||
}
|
||||
|
||||
.cell-content {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.cell-icon {
|
||||
font-size: 24px;
|
||||
line-height: 1.2;
|
||||
}
|
||||
|
||||
.cell-label {
|
||||
font-size: 10px;
|
||||
color: var(--vp-c-text-2);
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Interaction States */
|
||||
.grid-cell:hover, .grid-cell.is-source {
|
||||
z-index: 10;
|
||||
border-color: var(--vp-c-brand);
|
||||
background: var(--vp-c-bg);
|
||||
transform: scale(1.15);
|
||||
box-shadow: 0 8px 20px rgba(0,0,0,0.15);
|
||||
}
|
||||
|
||||
.grid-cell.is-strong-attn {
|
||||
border-color: var(--vp-c-brand-light);
|
||||
background: var(--vp-c-brand-dimm);
|
||||
}
|
||||
|
||||
.attn-badge {
|
||||
position: absolute;
|
||||
top: -8px;
|
||||
right: -8px;
|
||||
background: var(--vp-c-brand);
|
||||
color: white;
|
||||
font-size: 9px;
|
||||
padding: 2px 6px;
|
||||
border-radius: 10px;
|
||||
font-weight: bold;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.2);
|
||||
}
|
||||
|
||||
/* Info Panel */
|
||||
.info-panel {
|
||||
width: 200px;
|
||||
width: 280px;
|
||||
min-height: 260px;
|
||||
background: var(--vp-c-bg);
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
opacity: 0;
|
||||
transition: opacity 0.2s;
|
||||
pointer-events: none;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.info-panel.visible {
|
||||
opacity: 1;
|
||||
pointer-events: auto;
|
||||
.placeholder-text {
|
||||
text-align: center;
|
||||
color: var(--vp-c-text-3);
|
||||
font-size: 13px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.info-title {
|
||||
font-weight: bold;
|
||||
margin-bottom: 5px;
|
||||
color: var(--vp-c-brand);
|
||||
.cursor-icon {
|
||||
font-size: 32px;
|
||||
animation: bounce 2s infinite;
|
||||
}
|
||||
|
||||
.info-desc {
|
||||
font-size: 0.85em;
|
||||
.source-info {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 20px;
|
||||
padding-bottom: 15px;
|
||||
border-bottom: 1px dashed var(--vp-c-divider);
|
||||
}
|
||||
|
||||
.label {
|
||||
font-size: 12px;
|
||||
color: var(--vp-c-text-2);
|
||||
}
|
||||
|
||||
.patch-tag {
|
||||
background: var(--vp-c-brand-dimm);
|
||||
color: var(--vp-c-brand-dark);
|
||||
padding: 4px 12px;
|
||||
border-radius: 6px;
|
||||
font-size: 13px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.list-header {
|
||||
font-size: 11px;
|
||||
color: var(--vp-c-text-3);
|
||||
text-transform: uppercase;
|
||||
margin-bottom: 10px;
|
||||
letter-spacing: 0.5px;
|
||||
}
|
||||
|
||||
.attn-list {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
.attn-item {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
.attn-list li {
|
||||
.item-left {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
margin-bottom: 6px;
|
||||
font-size: 0.85em;
|
||||
width: 80px;
|
||||
}
|
||||
|
||||
.target-icon {
|
||||
width: 20px;
|
||||
text-align: center;
|
||||
.item-icon { font-size: 16px; }
|
||||
.item-name { font-size: 12px; font-weight: 500; }
|
||||
|
||||
.item-right {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.target-name {
|
||||
width: 40px;
|
||||
}
|
||||
|
||||
.bar-bg {
|
||||
.progress-bar {
|
||||
flex: 1;
|
||||
height: 6px;
|
||||
background: var(--vp-c-bg-soft);
|
||||
@@ -270,9 +425,50 @@ const getTopAttentions = (source) => {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.bar-fill {
|
||||
.progress-fill {
|
||||
height: 100%;
|
||||
background: var(--vp-c-brand);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.score-text {
|
||||
font-size: 11px;
|
||||
color: var(--vp-c-text-2);
|
||||
width: 30px;
|
||||
text-align: right;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
.insight-box {
|
||||
margin-top: 15px;
|
||||
background: var(--vp-c-yellow-dimm);
|
||||
padding: 10px;
|
||||
border-radius: 8px;
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
align-items: flex-start;
|
||||
}
|
||||
|
||||
.bulb { font-size: 16px; }
|
||||
.insight-text {
|
||||
font-size: 12px;
|
||||
color: var(--vp-c-text-1);
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
@keyframes bounce {
|
||||
0%, 100% { transform: translateY(0); }
|
||||
50% { transform: translateY(-5px); }
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.visual-stage {
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
.info-panel {
|
||||
width: 100%;
|
||||
min-height: auto;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -3,16 +3,16 @@
|
||||
<div class="demo-container">
|
||||
<!-- Step 1: Patch -->
|
||||
<div class="step-box">
|
||||
<div class="label">1. Patch (4x4)</div>
|
||||
<div class="label">1. Patch (16×16×3) (示意 / Toy)</div>
|
||||
<div class="grid-patch">
|
||||
<div
|
||||
v-for="n in 16"
|
||||
v-for="n in patchCellCount"
|
||||
:key="n"
|
||||
class="pixel"
|
||||
:style="{ backgroundColor: getPixelColor(n) }"
|
||||
></div>
|
||||
</div>
|
||||
<div class="desc">768 像素点</div>
|
||||
<div class="desc">16×16 像素 × 3 通道 = 768 标量值</div>
|
||||
</div>
|
||||
|
||||
<div class="arrow">➜</div>
|
||||
@@ -22,13 +22,14 @@
|
||||
<div class="label">2. Flatten</div>
|
||||
<div class="vector-container">
|
||||
<div
|
||||
v-for="n in 16"
|
||||
v-for="n in flattenSampleCount"
|
||||
:key="n"
|
||||
class="vector-cell"
|
||||
:style="{ backgroundColor: getPixelColor(n) }"
|
||||
></div>
|
||||
<div class="vector-ellipsis">…</div>
|
||||
</div>
|
||||
<div class="desc">拉平成向量</div>
|
||||
<div class="desc">得到 1×768 向量 (Vector)</div>
|
||||
</div>
|
||||
|
||||
<div class="arrow">× W</div>
|
||||
@@ -39,13 +40,16 @@
|
||||
<div class="embedding-container">
|
||||
<div v-for="n in 8" :key="n" class="embed-cell"></div>
|
||||
</div>
|
||||
<div class="desc">压缩特征 (D=8)</div>
|
||||
<div class="desc">映射到 D 维 (示意 D=8;常见 D=768)</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
const patchCellCount = 16 * 16
|
||||
const flattenSampleCount = 32
|
||||
|
||||
const getPixelColor = (n) => {
|
||||
// Generate a gradient of colors
|
||||
const hue = (n * 20) % 360
|
||||
@@ -89,8 +93,8 @@ const getPixelColor = (n) => {
|
||||
|
||||
.grid-patch {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(4, 1fr);
|
||||
gap: 2px;
|
||||
grid-template-columns: repeat(16, 1fr);
|
||||
gap: 1px;
|
||||
width: 80px;
|
||||
height: 80px;
|
||||
}
|
||||
@@ -105,7 +109,7 @@ const getPixelColor = (n) => {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1px;
|
||||
height: 120px;
|
||||
height: 140px;
|
||||
width: 20px;
|
||||
justify-content: center;
|
||||
}
|
||||
@@ -115,6 +119,14 @@ const getPixelColor = (n) => {
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.vector-ellipsis {
|
||||
font-size: 12px;
|
||||
line-height: 1;
|
||||
color: var(--vp-c-text-3);
|
||||
text-align: center;
|
||||
padding-top: 4px;
|
||||
}
|
||||
|
||||
.embedding-container {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
|
||||
+273
-240
@@ -8,113 +8,135 @@
|
||||
</div>
|
||||
</div>
|
||||
<div class="toggle-label">
|
||||
<span :class="{ active: !isVLM }">Pure LLM</span>
|
||||
<span :class="{ active: !isVLM }">Pure LLM (纯文本)</span>
|
||||
<span class="arrow">→</span>
|
||||
<span :class="{ active: isVLM }">Multimodal VLM</span>
|
||||
<span :class="{ active: isVLM }">Multimodal VLM (多模态)</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="status-desc">
|
||||
{{
|
||||
isVLM
|
||||
? '给大脑装上眼睛:视觉信号经过翻译,变成 Token 混入文字流。'
|
||||
: '纯文本大脑:只能听懂 Token 语言,无法感知图像。'
|
||||
? 'Tokens from vision are translated and placed before text tokens. (视觉信息被翻译成 Token,放在文字 Token 之前。)'
|
||||
: 'Text-only tokens flow into the LLM. (只有文字 Token 流入大模型。)'
|
||||
}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="diagram-stage" :class="{ 'vlm-mode': isVLM }">
|
||||
<!-- Vision Pipeline (Only visible in VLM mode) -->
|
||||
<div class="pipeline vision-pipeline">
|
||||
<div class="node-group">
|
||||
<div class="node input-node image-node">
|
||||
<span class="icon">�️</span>
|
||||
<span class="label">Image</span>
|
||||
</div>
|
||||
<div class="flow-arrow">⬇</div>
|
||||
<div
|
||||
class="node process-node vit-node"
|
||||
title="Vision Transformer: The Eye"
|
||||
>
|
||||
<span class="icon">�️</span>
|
||||
<span class="label">ViT</span>
|
||||
</div>
|
||||
<div class="flow-arrow">⬇</div>
|
||||
<div
|
||||
class="node adapter-node projector-node"
|
||||
title="Projector: The Translator"
|
||||
>
|
||||
<span class="icon">🔌</span>
|
||||
<span class="label">Projector</span>
|
||||
</div>
|
||||
<div class="flow-arrow connector-arrow">⤵</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Text Pipeline (Always visible) -->
|
||||
<div class="pipeline text-pipeline">
|
||||
<div class="node-group horizontal">
|
||||
<div class="node input-node text-node">
|
||||
<span class="icon">�</span>
|
||||
<span class="label">Prompt</span>
|
||||
</div>
|
||||
<div class="flow-arrow">➜</div>
|
||||
<div class="node process-node embed-node">
|
||||
<span class="icon">�</span>
|
||||
<span class="label">Embed</span>
|
||||
</div>
|
||||
|
||||
<!-- Merge Point Visualization -->
|
||||
<div class="merge-point" :class="{ active: isVLM }">
|
||||
<div class="plus-icon">+</div>
|
||||
<div class="merge-label">Concat</div>
|
||||
</div>
|
||||
|
||||
<div class="flow-arrow">➜</div>
|
||||
<div class="node core-node llm-node">
|
||||
<span class="icon">🧠</span>
|
||||
<span class="label">LLM Backbone</span>
|
||||
<div class="inner-flow">
|
||||
<span class="dot t1"></span>
|
||||
<span class="dot t2"></span>
|
||||
<span class="dot v1" v-if="isVLM"></span>
|
||||
<div class="diagram-stage">
|
||||
<div class="lanes">
|
||||
<div class="lane lane-vision" v-show="isVLM">
|
||||
<div class="lane-title">Vision Path (视觉路径)</div>
|
||||
<div class="lane-flow">
|
||||
<div class="node input-node">
|
||||
<span class="icon">🖼️</span>
|
||||
<span class="label">Image (图片)</span>
|
||||
</div>
|
||||
<span class="mini-arrow">→</span>
|
||||
<div class="node process-node vit-node">
|
||||
<span class="icon">👁️</span>
|
||||
<span class="label">ViT (视觉模型)</span>
|
||||
</div>
|
||||
<span class="mini-arrow">→</span>
|
||||
<div class="node adapter-node">
|
||||
<span class="icon">🔌</span>
|
||||
<span class="label">Projector (投影器)</span>
|
||||
</div>
|
||||
<span class="mini-arrow">→</span>
|
||||
<div class="token-box token-box-vision">
|
||||
<div class="token-box-title">Vision Tokens (视觉 Token)</div>
|
||||
<div class="tokens">
|
||||
<span class="token vision">v1</span>
|
||||
<span class="token vision">v2</span>
|
||||
<span class="token vision">v3</span>
|
||||
<span class="token vision">…</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flow-arrow">➜</div>
|
||||
<div class="node output-node">
|
||||
<span class="icon">💬</span>
|
||||
<span class="label">Response</span>
|
||||
</div>
|
||||
|
||||
<div class="lane lane-text">
|
||||
<div class="lane-title">Text Path (文字路径)</div>
|
||||
<div class="lane-flow">
|
||||
<div class="node input-node">
|
||||
<span class="icon">⌨️</span>
|
||||
<span class="label">Prompt (提示词)</span>
|
||||
</div>
|
||||
<span class="mini-arrow">→</span>
|
||||
<div class="node process-node">
|
||||
<span class="icon">🔤</span>
|
||||
<span class="label">Embed (向量化)</span>
|
||||
</div>
|
||||
<span class="mini-arrow">→</span>
|
||||
<div class="token-box">
|
||||
<div class="token-box-title">Text Tokens (文字 Token)</div>
|
||||
<div class="tokens">
|
||||
<span class="token text">t1</span>
|
||||
<span class="token text">t2</span>
|
||||
<span class="token text">t3</span>
|
||||
<span class="token text">…</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="merge-stage">
|
||||
<div class="merge-title">Token Sequence (输入序列)</div>
|
||||
<div class="sequence">
|
||||
<div v-if="isVLM" class="sequence-row">
|
||||
<span class="sequence-tag vision">Vision (视觉)</span>
|
||||
<div class="tokens">
|
||||
<span class="token vision">v1</span>
|
||||
<span class="token vision">v2</span>
|
||||
<span class="token vision">v3</span>
|
||||
<span class="token vision">…</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sequence-row">
|
||||
<span class="sequence-tag text">Text (文字)</span>
|
||||
<div class="tokens">
|
||||
<span class="token text">t1</span>
|
||||
<span class="token text">t2</span>
|
||||
<span class="token text">t3</span>
|
||||
<span class="token text">…</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="sequence-hint">
|
||||
<span v-if="isVLM">Concat: [Vision Tokens] + [Text Tokens] (拼接:视觉在前,文字在后)</span>
|
||||
<span v-else>Only [Text Tokens] (只有文字 Token)</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="core-stage">
|
||||
<span class="big-arrow">→</span>
|
||||
<div class="node core-node">
|
||||
<span class="icon">🧠</span>
|
||||
<span class="label">LLM Backbone (大模型)</span>
|
||||
</div>
|
||||
<span class="big-arrow">→</span>
|
||||
<div class="node output-node">
|
||||
<span class="icon">💬</span>
|
||||
<span class="label">Response (回复)</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="interactive-info">
|
||||
<div class="info-card" v-if="!isVLM">
|
||||
<h3>Standard LLM Flow</h3>
|
||||
<p>
|
||||
Text is converted into vectors (Embeddings) and processed by the
|
||||
Transformer to predict the next word.
|
||||
</p>
|
||||
</div>
|
||||
<div class="info-card vlm-info" v-else>
|
||||
<h3>VLM = LLM + Vision Encoder</h3>
|
||||
<ul>
|
||||
<li>
|
||||
<strong>ViT (The Eye):</strong> Slices image into patches and
|
||||
extracts features.
|
||||
</li>
|
||||
<li>
|
||||
<strong>Projector (The Translator):</strong> Converts visual
|
||||
features into the same "language" (vector dimension) as text
|
||||
embeddings.
|
||||
</li>
|
||||
<li>
|
||||
<strong>Concatenation:</strong> The translated visual tokens are
|
||||
pasted <em>before</em> the text tokens. The LLM sees them as
|
||||
"foreign words" it learned to understand.
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<transition name="fade" mode="out-in">
|
||||
<div class="info-card" v-if="!isVLM" key="llm">
|
||||
<h3>Standard LLM Flow (标准大模型流程)</h3>
|
||||
<p>Prompt → Embedding → Token Sequence → LLM → Response。</p>
|
||||
</div>
|
||||
<div class="info-card vlm-info" v-else key="vlm">
|
||||
<h3>VLM = LLM + Vision Encoder (视觉大模型原理)</h3>
|
||||
<ul>
|
||||
<li><strong>ViT (The Eye):</strong> 把图片编码成视觉特征。</li>
|
||||
<li><strong>Projector (The Translator):</strong> 把视觉特征映射到 LLM 的 Token 空间。</li>
|
||||
<li><strong>Concatenation (拼接):</strong> 把视觉 Token 放在文字 Token 之前,作为同一条输入序列。</li>
|
||||
</ul>
|
||||
</div>
|
||||
</transition>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
@@ -140,12 +162,11 @@ const toggleMode = () => {
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
/* Controls */
|
||||
.controls-header {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
margin-bottom: 30px;
|
||||
margin-bottom: 18px;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
@@ -216,105 +237,160 @@ const toggleMode = () => {
|
||||
font-size: 13px;
|
||||
color: var(--vp-c-text-2);
|
||||
text-align: center;
|
||||
height: 20px;
|
||||
line-height: 1.5;
|
||||
max-width: 720px;
|
||||
}
|
||||
|
||||
/* Diagram Stage */
|
||||
.diagram-stage {
|
||||
position: relative;
|
||||
height: 240px;
|
||||
background: var(--vp-c-bg);
|
||||
border: 1px dashed var(--vp-c-divider);
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
padding: 18px;
|
||||
}
|
||||
|
||||
.lanes {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
/* Pipelines */
|
||||
.pipeline {
|
||||
transition: all 0.5s cubic-bezier(0.34, 1.56, 0.64, 1);
|
||||
}
|
||||
|
||||
.text-pipeline {
|
||||
position: absolute;
|
||||
bottom: 80px; /* Centered vertically in LLM mode */
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
width: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.vlm-mode .text-pipeline {
|
||||
bottom: 40px; /* Move down in VLM mode */
|
||||
}
|
||||
|
||||
.vision-pipeline {
|
||||
position: absolute;
|
||||
top: 20px;
|
||||
left: 20%; /* Align with input side */
|
||||
opacity: 0;
|
||||
transform: translateY(-20px);
|
||||
pointer-events: none;
|
||||
}
|
||||
|
||||
.vlm-mode .vision-pipeline {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
pointer-events: auto;
|
||||
}
|
||||
|
||||
.node-group {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.node-group.horizontal {
|
||||
flex-direction: row;
|
||||
}
|
||||
|
||||
.vision-pipeline .node-group {
|
||||
flex-direction: column;
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.lane {
|
||||
background: var(--vp-c-bg-mute);
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
border-radius: 10px;
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.lane-title {
|
||||
font-size: 12px;
|
||||
color: var(--vp-c-text-2);
|
||||
margin-bottom: 10px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.lane-flow {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.merge-stage {
|
||||
background: var(--vp-c-bg);
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
border-radius: 10px;
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.merge-title {
|
||||
font-size: 12px;
|
||||
color: var(--vp-c-text-2);
|
||||
margin-bottom: 10px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.sequence {
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
background: var(--vp-c-bg-soft);
|
||||
border-radius: 10px;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.sequence-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
margin-bottom: 8px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.sequence-row:last-child {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.sequence-tag {
|
||||
font-size: 11px;
|
||||
font-weight: 800;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
background: var(--vp-c-bg);
|
||||
color: var(--vp-c-text-2);
|
||||
}
|
||||
|
||||
.sequence-tag.vision {
|
||||
border-color: var(--vp-c-yellow);
|
||||
}
|
||||
|
||||
.sequence-tag.text {
|
||||
border-color: var(--vp-c-brand);
|
||||
}
|
||||
|
||||
.sequence-hint {
|
||||
margin-top: 8px;
|
||||
font-size: 11px;
|
||||
color: var(--vp-c-text-2);
|
||||
}
|
||||
|
||||
.core-stage {
|
||||
margin-top: 14px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.big-arrow {
|
||||
font-size: 18px;
|
||||
color: var(--vp-c-text-2);
|
||||
font-weight: 800;
|
||||
}
|
||||
|
||||
.mini-arrow {
|
||||
font-size: 14px;
|
||||
color: var(--vp-c-text-3);
|
||||
font-weight: 800;
|
||||
}
|
||||
|
||||
/* Nodes */
|
||||
.node {
|
||||
background: var(--vp-c-bg);
|
||||
border: 2px solid var(--vp-c-divider);
|
||||
border-radius: 8px;
|
||||
border-radius: 10px;
|
||||
padding: 8px 12px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
min-width: 70px;
|
||||
min-width: 110px;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05);
|
||||
position: relative;
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
.icon {
|
||||
font-size: 20px;
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
|
||||
.label {
|
||||
font-size: 11px;
|
||||
font-weight: bold;
|
||||
font-weight: 800;
|
||||
text-align: center;
|
||||
line-height: 1.2;
|
||||
}
|
||||
|
||||
.input-node {
|
||||
border-color: #aaa;
|
||||
}
|
||||
|
||||
.process-node {
|
||||
border-color: var(--vp-c-brand-dimm);
|
||||
}
|
||||
|
||||
.core-node {
|
||||
border-color: var(--vp-c-brand);
|
||||
background: var(--vp-c-brand-dimm);
|
||||
min-width: 100px;
|
||||
min-width: 140px;
|
||||
}
|
||||
|
||||
.output-node {
|
||||
border-color: var(--vp-c-brand);
|
||||
}
|
||||
@@ -323,101 +399,64 @@ const toggleMode = () => {
|
||||
border-color: var(--vp-c-yellow);
|
||||
background: rgba(255, 197, 23, 0.05);
|
||||
}
|
||||
.projector-node {
|
||||
|
||||
.adapter-node {
|
||||
border-color: var(--vp-c-yellow);
|
||||
background: var(--vp-c-yellow-dimm);
|
||||
}
|
||||
|
||||
/* Arrows */
|
||||
.flow-arrow {
|
||||
color: var(--vp-c-text-3);
|
||||
font-size: 16px;
|
||||
.token-box {
|
||||
background: var(--vp-c-bg);
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
border-radius: 10px;
|
||||
padding: 10px;
|
||||
min-width: 220px;
|
||||
}
|
||||
|
||||
.connector-arrow {
|
||||
font-size: 24px;
|
||||
color: var(--vp-c-yellow);
|
||||
margin-top: -10px;
|
||||
margin-bottom: -10px;
|
||||
transform: rotate(-45deg) translateX(10px);
|
||||
.token-box-vision {
|
||||
border-color: var(--vp-c-yellow);
|
||||
}
|
||||
|
||||
/* Merge Point */
|
||||
.merge-point {
|
||||
width: 0;
|
||||
overflow: hidden;
|
||||
transition: all 0.5s;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
opacity: 0;
|
||||
}
|
||||
|
||||
.merge-point.active {
|
||||
width: 40px;
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.plus-icon {
|
||||
font-weight: bold;
|
||||
.token-box-title {
|
||||
font-size: 11px;
|
||||
font-weight: 800;
|
||||
color: var(--vp-c-text-2);
|
||||
font-size: 18px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.merge-label {
|
||||
font-size: 9px;
|
||||
color: var(--vp-c-text-3);
|
||||
}
|
||||
|
||||
/* Inner Flow Animation inside LLM */
|
||||
.inner-flow {
|
||||
.tokens {
|
||||
display: flex;
|
||||
gap: 4px;
|
||||
margin-top: 4px;
|
||||
height: 6px;
|
||||
gap: 6px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.dot {
|
||||
width: 6px;
|
||||
height: 6px;
|
||||
border-radius: 50%;
|
||||
background: #fff;
|
||||
opacity: 0.6;
|
||||
animation: pulse 1s infinite alternate;
|
||||
.token {
|
||||
font-size: 11px;
|
||||
padding: 2px 8px;
|
||||
border-radius: 999px;
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
background: var(--vp-c-bg-soft);
|
||||
color: var(--vp-c-text-1);
|
||||
}
|
||||
|
||||
.t1 {
|
||||
animation-delay: 0s;
|
||||
}
|
||||
.t2 {
|
||||
animation-delay: 0.2s;
|
||||
}
|
||||
.v1 {
|
||||
background: var(--vp-c-yellow);
|
||||
animation-delay: 0.4s;
|
||||
.token.vision {
|
||||
border-color: var(--vp-c-yellow);
|
||||
background: rgba(255, 197, 23, 0.12);
|
||||
}
|
||||
|
||||
@keyframes pulse {
|
||||
from {
|
||||
opacity: 0.3;
|
||||
transform: scale(0.8);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: scale(1.1);
|
||||
}
|
||||
.token.text {
|
||||
border-color: var(--vp-c-brand);
|
||||
background: rgba(59, 130, 246, 0.12);
|
||||
}
|
||||
|
||||
/* Interactive Info */
|
||||
.interactive-info {
|
||||
margin-top: 20px;
|
||||
margin-top: 16px;
|
||||
}
|
||||
|
||||
.info-card {
|
||||
background: var(--vp-c-bg-mute);
|
||||
padding: 16px;
|
||||
border-radius: 8px;
|
||||
animation: fadeIn 0.3s;
|
||||
}
|
||||
|
||||
.info-card h3 {
|
||||
@@ -439,31 +478,25 @@ const toggleMode = () => {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
@keyframes fadeIn {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(5px);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
.fade-enter-active,
|
||||
.fade-leave-active {
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
/* Mobile Adjustments */
|
||||
@media (max-width: 600px) {
|
||||
.fade-enter-from,
|
||||
.fade-leave-to {
|
||||
opacity: 0;
|
||||
}
|
||||
|
||||
@media (max-width: 720px) {
|
||||
.diagram-stage {
|
||||
height: 300px;
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.text-pipeline {
|
||||
flex-wrap: wrap;
|
||||
gap: 10px;
|
||||
width: 90%;
|
||||
.node {
|
||||
min-width: 100px;
|
||||
}
|
||||
|
||||
.vision-pipeline {
|
||||
left: 10%;
|
||||
.token-box {
|
||||
min-width: 200px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -6,67 +6,137 @@
|
||||
<div class="patchify-demo">
|
||||
<div class="control-panel">
|
||||
<div class="controls">
|
||||
<button class="action-btn" @click="toggleState">
|
||||
{{ isPatchified ? '还原图片 (Restore)' : '切分图片 (Patchify)' }}
|
||||
<button
|
||||
class="action-btn"
|
||||
@click="prevStep"
|
||||
:disabled="currentStep === 0"
|
||||
>
|
||||
⬅ 上一步 (Prev)
|
||||
</button>
|
||||
<div class="info">
|
||||
<span>Resolution: 224x224</span>
|
||||
<span>Patch Size: 16x16</span>
|
||||
<span>Total Patches: {{ 14 * 14 }}</span>
|
||||
</div>
|
||||
<span class="step-indicator">Step {{ currentStep + 1 }} / 4</span>
|
||||
<button
|
||||
class="action-btn primary"
|
||||
@click="nextStep"
|
||||
:disabled="currentStep === 3"
|
||||
>
|
||||
{{ currentStep === 3 ? '完成 (Done)' : '下一步 (Next) ➡' }}
|
||||
</button>
|
||||
</div>
|
||||
<div class="step-desc">
|
||||
{{ stepDescriptions[currentStep] }}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="visual-area">
|
||||
<!-- 原始/切分视图容器 -->
|
||||
<div class="image-container" :class="{ 'is-patchified': isPatchified }">
|
||||
<!--
|
||||
Step 0: Show container background, cells hidden
|
||||
Step 1: Show container background, grid overlay visible (cells with border)
|
||||
Step 2+: Container background hidden, cells visible with individual backgrounds
|
||||
-->
|
||||
<div
|
||||
class="image-container"
|
||||
:class="{
|
||||
'is-pixelated': currentStep >= 1,
|
||||
'is-patchified': currentStep >= 2
|
||||
}"
|
||||
>
|
||||
<div class="grid-overlay" v-if="currentStep === 1"></div>
|
||||
<div
|
||||
v-for="n in 196"
|
||||
:key="n"
|
||||
class="patch"
|
||||
:style="{
|
||||
'--delay': `${n * 0.005}s`,
|
||||
'--hue': `${(n % 14) * 20 + Math.floor(n / 14) * 20}`
|
||||
}"
|
||||
:style="getPatchStyle(n)"
|
||||
>
|
||||
<span class="patch-id" v-if="isPatchified">{{ n }}</span>
|
||||
<!-- Show number only in Pixelated stage to represent 'digitization' -->
|
||||
<span class="pixel-val" v-if="currentStep === 1">{{ Math.floor(Math.random() * 9) }}</span>
|
||||
<!-- Show ID in Patchified stage -->
|
||||
<span class="patch-id" v-if="currentStep >= 2">{{ n }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="arrow" v-if="isPatchified">⬇</div>
|
||||
<div class="arrow-down" v-if="currentStep >= 3">⬇</div>
|
||||
|
||||
<!-- 线性序列视图 -->
|
||||
<div class="sequence-container" v-if="isPatchified">
|
||||
<div class="sequence-label">Flattened Sequence (Token Input)</div>
|
||||
<div class="sequence-container" v-if="currentStep >= 3">
|
||||
<div class="sequence-label">Token Sequence: 196×D (每个 Token 是 D 维向量)</div>
|
||||
<div class="token-stream">
|
||||
<div
|
||||
v-for="n in 196"
|
||||
:key="n"
|
||||
class="mini-patch"
|
||||
:style="{ '--hue': `${(n % 14) * 20 + Math.floor(n / 14) * 20}` }"
|
||||
:style="getMiniPatchStyle(n)"
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="explanation">
|
||||
<p>
|
||||
<span class="icon">💡</span>
|
||||
计算机将图片切成 <strong>14x14 = 196</strong> 个小方块(Patch)。
|
||||
然后把这些方块“拉直”成一长串序列,就像把一段话里的单词排成一排一样。
|
||||
这就是 <strong>Visual Tokenization</strong>。
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
<script setup>
|
||||
import { ref } from 'vue'
|
||||
import { ref, computed } from 'vue'
|
||||
|
||||
const isPatchified = ref(false)
|
||||
const currentStep = ref(0)
|
||||
|
||||
const toggleState = () => {
|
||||
isPatchified.value = !isPatchified.value
|
||||
const stepDescriptions = [
|
||||
"1. 原始图片 (Original Image): 计算机看到的原始输入。",
|
||||
"2. 数字化 (Digitization): 图片本质上是一个数字矩阵 (H x W x C)。",
|
||||
"3. 切块 (Patchify): 典型设置:224×224 按 16×16 切成 14×14=196 个 Patch(此处等比示意)。",
|
||||
"4. 序列化 (Serialize): 将二维分布的 Patch “拍扁”成一维序列 (Spatial Flatten)。现在它看起来就像一串“视觉单词”,可以被 Transformer 逐个读取。"
|
||||
]
|
||||
|
||||
const nextStep = () => {
|
||||
if (currentStep.value < 3) currentStep.value++
|
||||
}
|
||||
|
||||
const prevStep = () => {
|
||||
if (currentStep.value > 0) currentStep.value--
|
||||
}
|
||||
|
||||
// 模拟一张风景图的 CSS 渐变
|
||||
// Sky (Blue) -> Mountains (Green/Grey) -> Sun (Yellow)
|
||||
const bgImage = 'linear-gradient(to bottom, #87CEEB 0%, #87CEEB 50%, #228B22 50%, #228B22 100%)'
|
||||
// Add a sun using radial gradient
|
||||
const complexBg = 'radial-gradient(circle at 70% 20%, #FFD700 0%, #FFD700 10%, transparent 10.5%), linear-gradient(to bottom, #87CEEB 0%, #87CEEB 60%, #4CA1AF 60%, #2C3E50 100%)'
|
||||
|
||||
const getPatchStyle = (n) => {
|
||||
const row = Math.floor((n - 1) / 14)
|
||||
const col = (n - 1) % 14
|
||||
|
||||
// Calculate background position for each patch to match the original image
|
||||
// The container is 280px, each patch is 20px.
|
||||
// 14 cols.
|
||||
const posX = col * -20
|
||||
const posY = row * -20
|
||||
|
||||
const isPatchified = currentStep.value >= 2
|
||||
|
||||
return {
|
||||
backgroundImage: complexBg,
|
||||
backgroundPosition: `${posX}px ${posY}px`,
|
||||
backgroundSize: '280px 280px',
|
||||
// In Step 0, patches are hidden to show pure container background
|
||||
// In Step 1, patches are visible but transparent background to show numbers/borders over container background
|
||||
// In Step 2, patches take over with their own background
|
||||
opacity: currentStep.value === 0 ? 0 : 1,
|
||||
// In Step 1, background must be transparent to see container bg
|
||||
backgroundImage: isPatchified ? complexBg : 'none',
|
||||
transform: isPatchified ? 'scale(0.9)' : 'scale(1)',
|
||||
transition: 'all 0.5s ease',
|
||||
}
|
||||
}
|
||||
|
||||
const getMiniPatchStyle = (n) => {
|
||||
const row = Math.floor((n - 1) / 14)
|
||||
const col = (n - 1) % 14
|
||||
const posX = col * -20
|
||||
const posY = row * -20
|
||||
|
||||
return {
|
||||
backgroundImage: complexBg,
|
||||
backgroundPosition: `${posX}px ${posY}px`,
|
||||
backgroundSize: '280px 280px',
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -77,40 +147,68 @@ const toggleState = () => {
|
||||
padding: 20px;
|
||||
background: var(--vp-c-bg-soft);
|
||||
margin: 20px 0;
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
.control-panel {
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 15px;
|
||||
}
|
||||
|
||||
.controls {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
gap: 15px;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.action-btn {
|
||||
background: var(--vp-c-brand);
|
||||
color: white;
|
||||
border: none;
|
||||
.step-indicator {
|
||||
font-family: monospace;
|
||||
font-weight: bold;
|
||||
color: var(--vp-c-text-2);
|
||||
}
|
||||
|
||||
.step-desc {
|
||||
font-size: 0.9em;
|
||||
color: var(--vp-c-text-1);
|
||||
text-align: center;
|
||||
background: var(--vp-c-bg-mute);
|
||||
padding: 8px 16px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-weight: 600;
|
||||
transition: opacity 0.2s;
|
||||
}
|
||||
|
||||
.action-btn:hover {
|
||||
opacity: 0.9;
|
||||
}
|
||||
|
||||
.info {
|
||||
min-height: 40px;
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.action-btn {
|
||||
background: var(--vp-c-bg-mute);
|
||||
color: var(--vp-c-text-1);
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
padding: 6px 12px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
font-size: 0.9em;
|
||||
color: var(--vp-c-text-2);
|
||||
}
|
||||
|
||||
.action-btn.primary {
|
||||
background: var(--vp-c-brand);
|
||||
color: white;
|
||||
border-color: var(--vp-c-brand);
|
||||
}
|
||||
|
||||
.action-btn:disabled {
|
||||
opacity: 0.5;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.action-btn:not(:disabled):hover {
|
||||
opacity: 0.8;
|
||||
transform: translateY(-1px);
|
||||
}
|
||||
|
||||
.visual-area {
|
||||
@@ -118,7 +216,7 @@ const toggleState = () => {
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 20px;
|
||||
min-height: 300px;
|
||||
min-height: 350px;
|
||||
}
|
||||
|
||||
.image-container {
|
||||
@@ -126,31 +224,55 @@ const toggleState = () => {
|
||||
grid-template-columns: repeat(14, 1fr);
|
||||
width: 280px;
|
||||
height: 280px;
|
||||
gap: 0;
|
||||
background: #333;
|
||||
/* Step 0 & 1 Background */
|
||||
background-image: radial-gradient(circle at 70% 20%, #FFD700 0%, #FFD700 10%, transparent 10.5%), linear-gradient(to bottom, #87CEEB 0%, #87CEEB 60%, #4CA1AF 60%, #2C3E50 100%);
|
||||
position: relative;
|
||||
transition: all 0.5s ease;
|
||||
border: 2px solid var(--vp-c-text-1);
|
||||
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
/* Step 2+: Remove container background, let patches show */
|
||||
.image-container.is-patchified {
|
||||
background-image: none;
|
||||
background-color: transparent;
|
||||
gap: 2px;
|
||||
background: transparent;
|
||||
border-color: transparent;
|
||||
}
|
||||
|
||||
.patch {
|
||||
background-color: hsl(var(--hue), 70%, 60%);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
font-size: 8px;
|
||||
color: rgba(0, 0, 0, 0.5);
|
||||
transition: all 0.5s ease;
|
||||
color: rgba(255, 255, 255, 0.8);
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.is-patchified .patch {
|
||||
/* Step 1: Pixelated Overlay Effect */
|
||||
.image-container.is-pixelated:not(.is-patchified) .patch {
|
||||
border: 1px solid rgba(255, 255, 255, 0.1);
|
||||
/* Use pseudo-element or just opacity logic in JS */
|
||||
}
|
||||
|
||||
/* Step 1: Digitization numbers */
|
||||
.pixel-val {
|
||||
font-family: monospace;
|
||||
font-size: 8px;
|
||||
color: rgba(0, 0, 0, 0.3);
|
||||
mix-blend-mode: overlay;
|
||||
}
|
||||
|
||||
.patch-id {
|
||||
background: rgba(0, 0, 0, 0.5);
|
||||
color: white;
|
||||
padding: 1px 2px;
|
||||
border-radius: 2px;
|
||||
transform: scale(0.9);
|
||||
font-size: 7px;
|
||||
}
|
||||
|
||||
.arrow-down {
|
||||
font-size: 24px;
|
||||
color: var(--vp-c-text-2);
|
||||
animation: bounce 1s infinite;
|
||||
}
|
||||
|
||||
.sequence-container {
|
||||
@@ -159,7 +281,7 @@ const toggleState = () => {
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
border: 1px solid var(--vp-c-divider);
|
||||
animation: fadeIn 0.5s ease;
|
||||
animation: slideUp 0.5s ease;
|
||||
}
|
||||
|
||||
.sequence-label {
|
||||
@@ -171,50 +293,48 @@ const toggleState = () => {
|
||||
|
||||
.token-stream {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 2px;
|
||||
flex-wrap: nowrap;
|
||||
gap: 1px;
|
||||
overflow-x: auto;
|
||||
padding: 10px 5px; /* Space for brackets */
|
||||
align-items: center;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* Add Matrix Brackets */
|
||||
.token-stream::before,
|
||||
.token-stream::after {
|
||||
content: '';
|
||||
display: block;
|
||||
width: 6px;
|
||||
height: 36px; /* Match vector height + padding */
|
||||
border: 2px solid var(--vp-c-text-3);
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.token-stream::before {
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
.token-stream::after {
|
||||
border-left: none;
|
||||
}
|
||||
|
||||
.mini-patch {
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
background-color: hsl(var(--hue), 70%, 60%);
|
||||
width: 6px; /* Thinner to allow more density */
|
||||
height: 32px; /* Taller to represent Vector Dimension D */
|
||||
border-radius: 1px;
|
||||
}
|
||||
|
||||
.explanation {
|
||||
margin-top: 20px;
|
||||
padding: 12px;
|
||||
background: var(--vp-c-bg-mute);
|
||||
border-radius: 6px;
|
||||
font-size: 0.9em;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
.arrow {
|
||||
font-size: 24px;
|
||||
color: var(--vp-c-text-2);
|
||||
animation: bounce 1s infinite;
|
||||
flex-shrink: 0;
|
||||
opacity: 0.9;
|
||||
}
|
||||
|
||||
@keyframes bounce {
|
||||
0%,
|
||||
100% {
|
||||
transform: translateY(0);
|
||||
}
|
||||
50% {
|
||||
transform: translateY(5px);
|
||||
}
|
||||
0%, 100% { transform: translateY(0); }
|
||||
50% { transform: translateY(5px); }
|
||||
}
|
||||
|
||||
@keyframes fadeIn {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(10px);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
@keyframes slideUp {
|
||||
from { opacity: 0; transform: translateY(20px); }
|
||||
to { opacity: 1; transform: translateY(0); }
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<div class="pipeline">
|
||||
<!-- 1. Transformer Output Grid -->
|
||||
<div class="stage">
|
||||
<div class="stage-label">1. Processed Patches (Grid)</div>
|
||||
<div class="stage-label">1. Patch Tokens (Shown as Grid) (Patch Token 网格示意)</div>
|
||||
<div class="grid-container">
|
||||
<div
|
||||
v-for="(item, index) in items"
|
||||
@@ -19,13 +19,13 @@
|
||||
|
||||
<div class="arrow-section">
|
||||
<div class="arrow-line"></div>
|
||||
<div class="arrow-text">Flatten & Output</div>
|
||||
<div class="arrow-text">Reshape for View: Grid ⇄ Sequence (重排显示:网格⇄序列)</div>
|
||||
</div>
|
||||
|
||||
<!-- 2. Feature Vector Sequence -->
|
||||
<div class="stage">
|
||||
<div class="stage-label">
|
||||
2. Feature Vector Sequence (The "Image Sentence")
|
||||
2. Output Token Sequence (N×D) (输出序列)
|
||||
</div>
|
||||
<div class="vector-sequence">
|
||||
<div
|
||||
|
||||
Reference in New Issue
Block a user