feat: 完整中文翻译 maths-cs-ai-compendium（数学·计算机科学·AI 知识大全）

翻译自英文原版 maths-cs-ai-compendium，共 20 章全部完成。第01章向量 | 第02章矩阵 | 第03章微积分第04章统计学 | 第05章概率论 | 第06章机器学习第07章计算语言学 | 第08章计算机视觉 | 第09章音频与语音第10章多模态学习 | 第11章自主系统 | 第12章图神经网络第13章计算与操作系统 | 第14章数据结构与算法第15章生产级软件工程 | 第16章 SIMD与GPU编程第17章 AI推理 | 第18章 ML系统设计第19章应用人工智能 | 第20章前沿人工智能翻译说明： - 所有数学公式 $...$ / $$...$$、代码块、图片引用完整保留 - mkdocs.yml 配置中文导航 + language: zh - README.md 已翻译为中文（兼 docs/index.md） - docs/ 目录包含指向各章文件的 symlink - 约 29,000 行中文内容，排除 .cache/ 构建缓存
2026-05-03 10:23:20 +08:00
commit 2536c937e3
400 changed files with 49040 additions and 0 deletions
@@ -0,0 +1,68 @@
+<svg width="700" height="320" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <marker id="tf-arrow" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
+      <path d="M0,0 L8,3 L0,6" fill="none" stroke="#555" stroke-width="1"/>
+    </marker>
+  </defs>
+  <text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Transformer Encoder Block</text>
+
+  <!-- Input at bottom -->
+  <text x="250" y="305" fill="#666" font-size="11" text-anchor="middle">Input embeddings + positional encoding</text>
+  <line x1="250" y1="293" x2="250" y2="275" stroke="#555" stroke-width="1.5" marker-end="url(#tf-arrow)"/>
+
+  <!-- Multi-Head Attention -->
+  <rect x="170" y="240" width="160" height="35" rx="6" fill="#3498db" opacity="0.15" stroke="#3498db" stroke-width="1.5"/>
+  <text x="250" y="262" fill="#3498db" font-size="11" font-weight="bold" text-anchor="middle">Multi-Head Attention</text>
+
+  <!-- Q K V arrows into attention -->
+  <text x="190" y="238" fill="#3498db" font-size="9" text-anchor="middle">Q</text>
+  <text x="250" y="238" fill="#3498db" font-size="9" text-anchor="middle">K</text>
+  <text x="310" y="238" fill="#3498db" font-size="9" text-anchor="middle">V</text>
+
+  <!-- Add & Norm 1 -->
+  <line x1="250" y1="240" x2="250" y2="218" stroke="#555" stroke-width="1.5" marker-end="url(#tf-arrow)"/>
+  <rect x="185" y="193" width="130" height="25" rx="6" fill="#9b59b6" opacity="0.15" stroke="#9b59b6" stroke-width="1.5"/>
+  <text x="250" y="210" fill="#9b59b6" font-size="10" font-weight="bold" text-anchor="middle">Add &amp; LayerNorm</text>
+
+  <!-- Residual connection 1 -->
+  <line x1="350" y1="258" x2="350" y2="206" stroke="#999" stroke-width="1" stroke-dasharray="4,3"/>
+  <line x1="350" y1="206" x2="315" y2="206" stroke="#999" stroke-width="1" marker-end="url(#tf-arrow)"/>
+  <text x="365" y="235" fill="#999" font-size="9">residual</text>
+
+  <!-- Feed-Forward Network -->
+  <line x1="250" y1="193" x2="250" y2="170" stroke="#555" stroke-width="1.5" marker-end="url(#tf-arrow)"/>
+  <rect x="175" y="137" width="150" height="33" rx="6" fill="#e74c3c" opacity="0.15" stroke="#e74c3c" stroke-width="1.5"/>
+  <text x="250" y="153" fill="#e74c3c" font-size="10" font-weight="bold" text-anchor="middle">Feed-Forward Network</text>
+  <text x="250" y="165" fill="#e74c3c" font-size="9" text-anchor="middle">(Linear, ReLU, Linear)</text>
+
+  <!-- Add & Norm 2 -->
+  <line x1="250" y1="137" x2="250" y2="118" stroke="#555" stroke-width="1.5" marker-end="url(#tf-arrow)"/>
+  <rect x="185" y="93" width="130" height="25" rx="6" fill="#9b59b6" opacity="0.15" stroke="#9b59b6" stroke-width="1.5"/>
+  <text x="250" y="110" fill="#9b59b6" font-size="10" font-weight="bold" text-anchor="middle">Add &amp; LayerNorm</text>
+
+  <!-- Residual connection 2 -->
+  <line x1="350" y1="153" x2="350" y2="106" stroke="#999" stroke-width="1" stroke-dasharray="4,3"/>
+  <line x1="350" y1="106" x2="315" y2="106" stroke="#999" stroke-width="1" marker-end="url(#tf-arrow)"/>
+  <text x="365" y="132" fill="#999" font-size="9">residual</text>
+
+  <!-- Output -->
+  <line x1="250" y1="93" x2="250" y2="68" stroke="#555" stroke-width="1.5" marker-end="url(#tf-arrow)"/>
+  <text x="250" y="58" fill="#666" font-size="11" text-anchor="middle">Output (to next block or head)</text>
+
+  <!-- Attention detail (right side) -->
+  <rect x="430" y="55" width="240" height="130" rx="6" fill="#f5f5f5" stroke="#ddd" stroke-width="1"/>
+  <text x="550" y="75" fill="#333" font-size="11" font-weight="bold" text-anchor="middle">Scaled Dot-Product Attention</text>
+
+  <text x="445" y="98" fill="#333" font-size="10">1. Score = Q · K^T / sqrt(d_k)</text>
+  <text x="445" y="118" fill="#333" font-size="10">2. Weights = softmax(Score)</text>
+  <text x="445" y="138" fill="#333" font-size="10">3. Output = Weights · V</text>
+  <text x="445" y="162" fill="#666" font-size="9">Multi-head: h parallel attentions,</text>
+  <text x="445" y="175" fill="#666" font-size="9">concatenated and projected</text>
+
+  <!-- Key insight box -->
+  <rect x="430" y="200" width="240" height="65" rx="6" fill="#f5f5f5" stroke="#ddd" stroke-width="1"/>
+  <text x="550" y="220" fill="#333" font-size="10" font-weight="bold" text-anchor="middle">Why Transformers Work</text>
+  <text x="445" y="238" fill="#666" font-size="9">Every token attends to every other</text>
+  <text x="445" y="252" fill="#666" font-size="9">token in parallel (no sequential</text>
+  <text x="445" y="266" fill="#666" font-size="9">bottleneck like RNNs)</text>
+</svg>