2536c937e3
翻译自英文原版 maths-cs-ai-compendium,共 20 章全部完成。 第01章 向量 | 第02章 矩阵 | 第03章 微积分 第04章 统计学 | 第05章 概率论 | 第06章 机器学习 第07章 计算语言学 | 第08章 计算机视觉 | 第09章 音频与语音 第10章 多模态学习 | 第11章 自主系统 | 第12章 图神经网络 第13章 计算与操作系统 | 第14章 数据结构与算法 第15章 生产级软件工程 | 第16章 SIMD与GPU编程 第17章 AI推理 | 第18章 ML系统设计 第19章 应用人工智能 | 第20章 前沿人工智能 翻译说明: - 所有数学公式 $...$ / $$...$$、代码块、图片引用完整保留 - mkdocs.yml 配置中文导航 + language: zh - README.md 已翻译为中文(兼 docs/index.md) - docs/ 目录包含指向各章文件的 symlink - 约 29,000 行中文内容,排除 .cache/ 构建缓存
95 lines
5.7 KiB
XML
95 lines
5.7 KiB
XML
<svg xmlns="http://www.w3.org/2000/svg" width="700" height="300" font-family="Arial, Helvetica, sans-serif">
|
|
<defs>
|
|
<marker id="arrow-xv" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
|
|
<polygon points="0,0 8,3 0,6" fill="#333"/>
|
|
</marker>
|
|
</defs>
|
|
|
|
<!-- Title -->
|
|
<text x="350" y="22" text-anchor="middle" font-size="14" font-weight="bold" fill="#333">x-vector TDNN Architecture</text>
|
|
|
|
<!-- Frame-level bracket -->
|
|
<line x1="45" y1="38" x2="45" y2="163" stroke="#3498db" stroke-width="1.5"/>
|
|
<line x1="45" y1="38" x2="55" y2="38" stroke="#3498db" stroke-width="1.5"/>
|
|
<line x1="45" y1="163" x2="55" y2="163" stroke="#3498db" stroke-width="1.5"/>
|
|
<text x="30" y="105" text-anchor="middle" font-size="8" fill="#3498db" transform="rotate(-90,30,105)">Frame-level</text>
|
|
|
|
<!-- Input features -->
|
|
<rect x="70" y="38" width="130" height="28" rx="6" fill="rgba(52,152,219,0.1)" stroke="#3498db" stroke-width="1.5"/>
|
|
<text x="135" y="56" text-anchor="middle" font-size="9" fill="#333">Input Features (MFCCs)</text>
|
|
|
|
<line x1="135" y1="66" x2="135" y2="78" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- TDNN Layer 1 -->
|
|
<rect x="70" y="80" width="130" height="24" rx="6" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="1.5"/>
|
|
<text x="135" y="96" text-anchor="middle" font-size="9" fill="#333">TDNN Layer 1</text>
|
|
<text x="210" y="96" font-size="7" fill="#666">ctx=[-2,2]</text>
|
|
|
|
<line x1="135" y1="104" x2="135" y2="112" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- TDNN Layer 2 -->
|
|
<rect x="70" y="114" width="130" height="24" rx="6" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="1.5"/>
|
|
<text x="135" y="130" text-anchor="middle" font-size="9" fill="#333">TDNN Layer 2</text>
|
|
<text x="210" y="130" font-size="7" fill="#666">ctx=[-2,0,2]</text>
|
|
|
|
<line x1="135" y1="138" x2="135" y2="146" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- Dots -->
|
|
<text x="135" y="157" text-anchor="middle" font-size="11" fill="#666">...</text>
|
|
|
|
<!-- TDNN Layer 5 -->
|
|
<rect x="70" y="160" width="130" height="24" rx="6" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="1.5"/>
|
|
<text x="135" y="176" text-anchor="middle" font-size="9" fill="#333">TDNN Layer 5</text>
|
|
|
|
<!-- Arrow to stats pooling -->
|
|
<line x1="200" y1="172" x2="275" y2="172" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- Boundary line -->
|
|
<line x1="260" y1="38" x2="260" y2="200" stroke="#e74c3c" stroke-width="1" stroke-dasharray="5,4"/>
|
|
<text x="265" y="48" font-size="7" fill="#e74c3c">boundary</text>
|
|
|
|
<!-- Segment-level bracket -->
|
|
<line x1="605" y1="130" x2="605" y2="200" stroke="#e74c3c" stroke-width="1.5"/>
|
|
<line x1="595" y1="130" x2="605" y2="130" stroke="#e74c3c" stroke-width="1.5"/>
|
|
<line x1="595" y1="200" x2="605" y2="200" stroke="#e74c3c" stroke-width="1.5"/>
|
|
<text x="625" y="170" text-anchor="middle" font-size="8" fill="#e74c3c" transform="rotate(-90,625,170)">Segment-level</text>
|
|
|
|
<!-- Statistics Pooling -->
|
|
<rect x="278" y="150" width="130" height="44" rx="6" fill="rgba(243,156,18,0.15)" stroke="#f39c12" stroke-width="1.5"/>
|
|
<text x="343" y="168" text-anchor="middle" font-size="9" font-weight="bold" fill="#333">Statistics Pooling</text>
|
|
<text x="343" y="182" text-anchor="middle" font-size="8" fill="#666">mean + std over frames</text>
|
|
|
|
<line x1="408" y1="172" x2="430" y2="172" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- FC 1 -->
|
|
<rect x="433" y="158" width="70" height="28" rx="6" fill="rgba(155,89,182,0.12)" stroke="#9b59b6" stroke-width="1.5"/>
|
|
<text x="468" y="176" text-anchor="middle" font-size="9" fill="#333">FC Layer</text>
|
|
|
|
<line x1="503" y1="172" x2="520" y2="172" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- FC 2 -->
|
|
<rect x="523" y="158" width="70" height="28" rx="6" fill="rgba(155,89,182,0.12)" stroke="#9b59b6" stroke-width="1.5"/>
|
|
<text x="558" y="176" text-anchor="middle" font-size="9" fill="#333">FC Layer</text>
|
|
|
|
<!-- Arrow down to embedding -->
|
|
<line x1="558" y1="186" x2="558" y2="205" stroke="#333" stroke-width="1.2" marker-end="url(#arrow-xv)"/>
|
|
|
|
<!-- Speaker Embedding -->
|
|
<rect x="490" y="207" width="136" height="28" rx="6" fill="rgba(39,174,96,0.15)" stroke="#27ae60" stroke-width="1.8"/>
|
|
<text x="558" y="225" text-anchor="middle" font-size="10" font-weight="bold" fill="#27ae60">x-vector (512-d)</text>
|
|
|
|
<!-- Temporal context illustration -->
|
|
<rect x="290" y="60" width="140" height="66" rx="6" fill="rgba(52,152,219,0.06)" stroke="#3498db" stroke-width="0.8" stroke-dasharray="3,3"/>
|
|
<text x="360" y="76" text-anchor="middle" font-size="8" font-weight="bold" fill="#3498db">Temporal Context</text>
|
|
<!-- Small frame boxes -->
|
|
<rect x="305" y="84" width="12" height="12" rx="1" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="0.7"/>
|
|
<rect x="322" y="84" width="12" height="12" rx="1" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="0.7"/>
|
|
<rect x="339" y="84" width="12" height="12" rx="1" fill="rgba(52,152,219,0.3)" stroke="#3498db" stroke-width="1.2"/>
|
|
<rect x="356" y="84" width="12" height="12" rx="1" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="0.7"/>
|
|
<rect x="373" y="84" width="12" height="12" rx="1" fill="rgba(52,152,219,0.15)" stroke="#3498db" stroke-width="0.7"/>
|
|
<text x="360" y="112" text-anchor="middle" font-size="7" fill="#666">Each TDNN sees a context window</text>
|
|
|
|
<!-- Note box -->
|
|
<rect x="30" y="250" width="640" height="36" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
|
|
<text x="50" y="273" font-size="9" fill="#666">Statistics pooling converts variable-length frame-level features into a fixed-size segment-level embedding.</text>
|
|
</svg> |