feat: 完整中文翻译 maths-cs-ai-compendium（数学·计算机科学·AI 知识大全）

翻译自英文原版 maths-cs-ai-compendium，共 20 章全部完成。第01章向量 | 第02章矩阵 | 第03章微积分第04章统计学 | 第05章概率论 | 第06章机器学习第07章计算语言学 | 第08章计算机视觉 | 第09章音频与语音第10章多模态学习 | 第11章自主系统 | 第12章图神经网络第13章计算与操作系统 | 第14章数据结构与算法第15章生产级软件工程 | 第16章 SIMD与GPU编程第17章 AI推理 | 第18章 ML系统设计第19章应用人工智能 | 第20章前沿人工智能翻译说明： - 所有数学公式 $...$ / $$...$$、代码块、图片引用完整保留 - mkdocs.yml 配置中文导航 + language: zh - README.md 已翻译为中文（兼 docs/index.md） - docs/ 目录包含指向各章文件的 symlink - 约 29,000 行中文内容，排除 .cache/ 构建缓存
2026-05-03 10:23:20 +08:00
commit 2536c937e3
400 changed files with 49040 additions and 0 deletions
@@ -0,0 +1,83 @@
+<svg width="700" height="280" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="vit-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
+      <polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
+    </marker>
+  </defs>
+  <text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Vision Transformer (ViT) Pipeline</text>
+
+  <!-- Input image with patch grid -->
+  <text x="55" y="48" fill="#666" font-size="10" text-anchor="middle">Image</text>
+  <rect x="15" y="55" width="80" height="80" rx="2" fill="#ddd" stroke="#999" stroke-width="1.5"/>
+  <!-- 4x4 patch grid -->
+  <line x1="35" y1="55" x2="35" y2="135" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <line x1="55" y1="55" x2="55" y2="135" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <line x1="75" y1="55" x2="75" y2="135" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <line x1="15" y1="75" x2="95" y2="75" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <line x1="15" y1="95" x2="95" y2="95" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <line x1="15" y1="115" x2="95" y2="115" stroke="#e74c3c" stroke-width="0.8" stroke-dasharray="3,2"/>
+  <text x="55" y="150" fill="#e74c3c" font-size="8" text-anchor="middle">16×16 patches</text>
+
+  <!-- Arrow: split into patches -->
+  <line x1="100" y1="95" x2="130" y2="95" stroke="#555" stroke-width="1.2" marker-end="url(#vit-arrow)"/>
+  <text x="115" y="88" fill="#666" font-size="7" text-anchor="middle">flatten</text>
+
+  <!-- Patch tokens (vertical stack) -->
+  <text x="160" y="48" fill="#666" font-size="10" text-anchor="middle">Patches</text>
+  <!-- CLS token -->
+  <rect x="140" y="55" width="40" height="14" rx="3" fill="#9b59b6" opacity="0.3" stroke="#9b59b6" stroke-width="1.5"/>
+  <text x="160" y="65" fill="#9b59b6" font-size="7" text-anchor="middle" font-weight="bold">[CLS]</text>
+  <!-- Patch tokens -->
+  <rect x="140" y="72" width="40" height="10" rx="2" fill="#3498db" opacity="0.2" stroke="#3498db" stroke-width="0.8"/>
+  <rect x="140" y="84" width="40" height="10" rx="2" fill="#3498db" opacity="0.2" stroke="#3498db" stroke-width="0.8"/>
+  <rect x="140" y="96" width="40" height="10" rx="2" fill="#3498db" opacity="0.2" stroke="#3498db" stroke-width="0.8"/>
+  <text x="160" y="116" fill="#666" font-size="7" text-anchor="middle">⋮</text>
+  <rect x="140" y="120" width="40" height="10" rx="2" fill="#3498db" opacity="0.2" stroke="#3498db" stroke-width="0.8"/>
+  <text x="160" y="145" fill="#666" font-size="8" text-anchor="middle">N+1 tokens</text>
+
+  <!-- Arrow: linear projection + position -->
+  <line x1="185" y1="95" x2="215" y2="95" stroke="#555" stroke-width="1.2" marker-end="url(#vit-arrow)"/>
+
+  <!-- Projection + position embeddings -->
+  <rect x="220" y="55" width="65" height="80" rx="4" fill="#f39c12" opacity="0.12" stroke="#f39c12" stroke-width="1.5"/>
+  <text x="252" y="80" fill="#f39c12" font-size="8" text-anchor="middle" font-weight="bold">Linear</text>
+  <text x="252" y="92" fill="#f39c12" font-size="8" text-anchor="middle" font-weight="bold">Proj</text>
+  <text x="252" y="108" fill="#f39c12" font-size="8" text-anchor="middle">+ pos emb</text>
+
+  <!-- Arrow to transformer -->
+  <line x1="290" y1="95" x2="310" y2="95" stroke="#555" stroke-width="1.2" marker-end="url(#vit-arrow)"/>
+
+  <!-- Transformer encoder blocks -->
+  <rect x="315" y="45" width="200" height="110" rx="8" fill="#3498db" opacity="0.08" stroke="#3498db" stroke-width="2"/>
+  <text x="415" y="65" fill="#3498db" font-size="10" text-anchor="middle" font-weight="bold">Transformer Encoder</text>
+
+  <!-- Stacked blocks inside -->
+  <rect x="330" y="75" width="170" height="25" rx="4" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1"/>
+  <text x="415" y="92" fill="#3498db" font-size="8" text-anchor="middle">Multi-Head Self-Attention + FFN</text>
+  <rect x="330" y="105" width="170" height="25" rx="4" fill="#3498db" opacity="0.15" stroke="#3498db" stroke-width="1"/>
+  <text x="415" y="122" fill="#3498db" font-size="8" text-anchor="middle">Multi-Head Self-Attention + FFN</text>
+  <text x="415" y="145" fill="#666" font-size="8" text-anchor="middle">× L layers</text>
+
+  <!-- Arrow to CLS output -->
+  <line x1="520" y1="85" x2="555" y2="85" stroke="#9b59b6" stroke-width="1.5" marker-end="url(#vit-arrow)"/>
+  <text x="538" y="78" fill="#9b59b6" font-size="7" text-anchor="middle">[CLS]</text>
+
+  <!-- Classification head -->
+  <rect x="560" y="65" width="70" height="40" rx="6" fill="#27ae60" opacity="0.12" stroke="#27ae60" stroke-width="1.5"/>
+  <text x="595" y="82" fill="#27ae60" font-size="9" text-anchor="middle" font-weight="bold">MLP</text>
+  <text x="595" y="95" fill="#27ae60" font-size="8" text-anchor="middle">Head</text>
+
+  <!-- Output -->
+  <line x1="630" y1="85" x2="660" y2="85" stroke="#555" stroke-width="1.2" marker-end="url(#vit-arrow)"/>
+  <text x="680" y="82" fill="#333" font-size="9" text-anchor="middle" font-weight="bold">class</text>
+  <text x="680" y="94" fill="#333" font-size="9" text-anchor="middle" font-weight="bold">label</text>
+
+  <!-- Bottom: key details -->
+  <rect x="30" y="175" width="640" height="90" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
+  <text x="350" y="195" fill="#333" font-size="10" text-anchor="middle" font-weight="bold">Key Details</text>
+  <text x="170" y="215" fill="#666" font-size="9" text-anchor="middle">Patch embedding ≡ Conv2d(P, P, stride=P)</text>
+  <text x="500" y="215" fill="#666" font-size="9" text-anchor="middle">Position embeddings: learnable or sinusoidal</text>
+  <text x="170" y="235" fill="#666" font-size="9" text-anchor="middle">[CLS] token aggregates global info</text>
+  <text x="500" y="235" fill="#666" font-size="9" text-anchor="middle">Self-attention cost: O(N²) in patches</text>
+  <text x="350" y="255" fill="#666" font-size="9" text-anchor="middle">Less inductive bias than CNNs — needs more data, but scales better</text>
+</svg>