+
+
+
+
+
+
- {{ item.icon }}
-
{{ item.label }}
+
+ {{ item.icon }}
+ {{ item.label }}
+
+
+
+ {{ (getAttentionScore(hoverIndex, index) * 100).toFixed(0) }}%
+
-
-
-
-
-
Patch: {{ items[hoverIndex]?.label }}
-
正在关注:
-
- -
- {{ items[targetIdx].icon }}
- {{ items[targetIdx].label }}
-
-
+
+
+
+ 👆
+ 把鼠标悬停在任意方块上,
观察它在"关注"谁
+
+
+
+
当前 Patch:
+
+ {{ items[hoverIndex].icon }} {{ items[hoverIndex].label }}
-
-
+
+
+
+
+
+
+ {{ items[idx].icon }}
+ {{ items[idx].label }}
+
+
+
+
{{ (score * 100).toFixed(0) }}%
+
+
+
+
+
+ 💡
+
+ {{ getInsightText(hoverIndex) }}
+
+
+
@@ -62,207 +115,309 @@ import { ref } from 'vue'
const hoverIndex = ref(-1)
+// 3x3 Grid Data (Cat in grass)
const items = [
- { icon: '🌲', label: '背景' },
- { icon: '🌲', label: '背景' },
- { icon: '☁️', label: '天空' },
- { icon: '👂', label: '猫耳' },
- { icon: '😼', label: '猫脸' },
- { icon: '🌲', label: '背景' },
- { icon: '🐾', label: '猫爪' },
- { icon: '🧶', label: '毛线' },
- { icon: '🌱', label: '草地' }
+ { icon: '🌿', label: '草地' }, // 0
+ { icon: '🌿', label: '草地' }, // 1
+ { icon: '🦋', label: '蝴蝶' }, // 2
+ { icon: '🌿', label: '草地' }, // 3
+ { icon: '🐱', label: '猫头' }, // 4
+ { icon: '🌿', label: '草地' }, // 5
+ { icon: '🧶', label: '毛球' }, // 6
+ { icon: '🐾', label: '猫爪' }, // 7
+ { icon: '🌿', label: '草地' } // 8
]
-// 3x3 Grid
+// Layout Logic
const getCenter = (index) => {
const row = Math.floor(index / 3)
const col = index % 3
- // Assuming 80px cell + 10px gap
- const cellSize = 80
- const gap = 10
- const offset = cellSize / 2
+ const gap = 100
+ const offsetX = 50
+ const offsetY = 50
return {
- x: col * (cellSize + gap) + offset,
- y: row * (cellSize + gap) + offset
+ x: col * gap + offsetX,
+ y: row * gap + offsetY
}
}
-// Mock attention weights
-const getAttentionWeight = (source, target) => {
- // Self attention is ignored for visualization clarity usually, but let's say:
+// Attention Logic
+const getAttentionScore = (source, target) => {
+ if (source === target) return 0
+
+ // Cat Head (4) attends strongly to:
+ if (source === 4) {
+ if (target === 7) return 0.95 // Paws (Body parts connected)
+ if (target === 2) return 0.8 // Butterfly (Interest)
+ if (target === 6) return 0.6 // Yarn (Toy)
+ return 0.1 // Background
+ }
- // Cat parts (3, 4, 6) attend strongly to each other
- const catParts = [3, 4, 6]
- const isSourceCat = catParts.includes(source)
- const isTargetCat = catParts.includes(target)
+ // Cat Paws (7) attends strongly to:
+ if (source === 7) {
+ if (target === 4) return 0.95 // Head
+ if (target === 6) return 0.9 // Yarn (Touching)
+ return 0.1
+ }
- if (isSourceCat && isTargetCat) return 0.9 // Strong connection between cat parts
+ // Butterfly (2)
+ if (source === 2) {
+ if (target === 4) return 0.7 // Danger?
+ return 0.2
+ }
- // Cat interacts with Yarn (7)
- if (isSourceCat && target === 7) return 0.7
- if (source === 7 && isTargetCat) return 0.7
+ // Grass (Background)
+ // Background patches attend to each other for texture consistency
+ const bgIndices = [0, 1, 3, 5, 8]
+ if (bgIndices.includes(source)) {
+ if (bgIndices.includes(target)) return 0.6
+ return 0.05
+ }
- // Background parts attend to each other
- const bgParts = [0, 1, 2, 5, 8]
- if (bgParts.includes(source) && bgParts.includes(target)) return 0.5
-
- return 0.1 // Weak attention otherwise
+ // Default fallback
+ return 0.1
}
-const getAttentionColor = (source, target) => {
- const weight = getAttentionWeight(source, target)
- // Green for strong, gray for weak
- if (weight > 0.6) return `rgba(16, 185, 129, ${weight})`
- return `rgba(156, 163, 175, ${weight * 0.5})`
+const getLineColor = (source, target) => {
+ const score = getAttentionScore(source, target)
+ return score > 0.5 ? 'var(--vp-c-brand)' : 'var(--vp-c-text-3)'
}
-const getAttentionWidth = (source, target) => {
- const weight = getAttentionWeight(source, target)
- return weight * 5
+const getLineWidth = (source, target) => {
+ const score = getAttentionScore(source, target)
+ return 1 + score * 4
+}
+
+const getLineOpacity = (source, target) => {
+ const score = getAttentionScore(source, target)
+ return 0.2 + score * 0.8
}
const getTopAttentions = (source) => {
- const weights = {}
+ const scores = {}
items.forEach((_, idx) => {
if (idx !== source) {
- weights[idx] = getAttentionWeight(source, idx)
+ scores[idx] = getAttentionScore(source, idx)
}
})
- // Sort by weight desc
- return weights
+ // Sort descending
+ const sortedKeys = Object.keys(scores).sort((a, b) => scores[b] - scores[a])
+ const top3 = {}
+ sortedKeys.slice(0, 3).forEach(key => {
+ top3[key] = scores[key]
+ })
+ return top3
+}
+
+const getInsightText = (idx) => {
+ if (idx === 4) return "猫头最关注猫爪(组成身体)和蝴蝶(捕猎目标)。"
+ if (idx === 7) return "猫爪最关注毛球(正在玩耍)和猫头。"
+ if (idx === 2) return "蝴蝶关注到了猫,可能是因为它是个威胁。"
+ if ([0,1,3,5,8].includes(idx)) return "草地主要关注周围的草地,确认背景纹理。"
+ if (idx === 6) return "毛球和猫爪有很强的互动关系。"
+ return "Self-Attention 让每个部分找到它的上下文关联。"
}
diff --git a/docs/.vitepress/theme/components/appendix/vlm-intro/LinearProjectionDemo.vue b/docs/.vitepress/theme/components/appendix/vlm-intro/LinearProjectionDemo.vue
index 5f5996b..bd9f9c1 100644
--- a/docs/.vitepress/theme/components/appendix/vlm-intro/LinearProjectionDemo.vue
+++ b/docs/.vitepress/theme/components/appendix/vlm-intro/LinearProjectionDemo.vue
@@ -3,16 +3,16 @@
-
1. Patch (4x4)
+
1. Patch (16×16×3) (示意 / Toy)
-
768 像素点
+
16×16 像素 × 3 通道 = 768 标量值
➜
@@ -22,13 +22,14 @@
2. Flatten
-
拉平成向量
+
得到 1×768 向量 (Vector)
× W
@@ -39,13 +40,16 @@
-
压缩特征 (D=8)
+
映射到 D 维 (示意 D=8;常见 D=768)