2536c937e3
翻译自英文原版 maths-cs-ai-compendium,共 20 章全部完成。 第01章 向量 | 第02章 矩阵 | 第03章 微积分 第04章 统计学 | 第05章 概率论 | 第06章 机器学习 第07章 计算语言学 | 第08章 计算机视觉 | 第09章 音频与语音 第10章 多模态学习 | 第11章 自主系统 | 第12章 图神经网络 第13章 计算与操作系统 | 第14章 数据结构与算法 第15章 生产级软件工程 | 第16章 SIMD与GPU编程 第17章 AI推理 | 第18章 ML系统设计 第19章 应用人工智能 | 第20章 前沿人工智能 翻译说明: - 所有数学公式 $...$ / $$...$$、代码块、图片引用完整保留 - mkdocs.yml 配置中文导航 + language: zh - README.md 已翻译为中文(兼 docs/index.md) - docs/ 目录包含指向各章文件的 symlink - 约 29,000 行中文内容,排除 .cache/ 构建缓存
107 lines
7.1 KiB
XML
107 lines
7.1 KiB
XML
<svg width="720" height="320" xmlns="http://www.w3.org/2000/svg">
|
|
<defs>
|
|
<marker id="bow-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
|
|
<polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
|
|
</marker>
|
|
</defs>
|
|
<text x="360" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Bag-of-Words: Document → Word Count Vector</text>
|
|
|
|
<!-- Document text -->
|
|
<rect x="30" y="45" width="250" height="70" rx="8" fill="#f5f5f5" stroke="#333" stroke-width="1.5"/>
|
|
<text x="45" y="65" fill="#333" font-size="12" font-weight="bold">Document:</text>
|
|
<text x="45" y="85" fill="#555" font-size="12">"the cat sat on the mat"</text>
|
|
<text x="45" y="103" fill="#999" font-size="10">(word order is discarded)</text>
|
|
|
|
<!-- Arrow -->
|
|
<line x1="280" y1="80" x2="330" y2="80" stroke="#555" stroke-width="1.5" marker-end="url(#bow-arrow)"/>
|
|
<text x="305" y="72" fill="#666" font-size="9" text-anchor="middle">count</text>
|
|
|
|
<!-- Word count table -->
|
|
<text x="470" y="45" fill="#333" font-size="11" text-anchor="middle" font-weight="bold">Word Counts</text>
|
|
|
|
<!-- Header -->
|
|
<rect x="340" y="55" width="75" height="22" rx="3" fill="#3498db" opacity="0.2" stroke="#3498db" stroke-width="1"/>
|
|
<text x="377" y="70" fill="#3498db" font-size="10" text-anchor="middle" font-weight="bold">Word</text>
|
|
<rect x="420" y="55" width="55" height="22" rx="3" fill="#e74c3c" opacity="0.2" stroke="#e74c3c" stroke-width="1"/>
|
|
<text x="447" y="70" fill="#e74c3c" font-size="10" text-anchor="middle" font-weight="bold">Count</text>
|
|
|
|
<!-- Rows -->
|
|
<text x="377" y="92" fill="#333" font-size="11" text-anchor="middle">the</text>
|
|
<text x="447" y="92" fill="#e74c3c" font-size="12" text-anchor="middle" font-weight="bold">2</text>
|
|
<line x1="340" y1="97" x2="475" y2="97" stroke="#eee" stroke-width="1"/>
|
|
|
|
<text x="377" y="112" fill="#333" font-size="11" text-anchor="middle">cat</text>
|
|
<text x="447" y="112" fill="#e74c3c" font-size="12" text-anchor="middle" font-weight="bold">1</text>
|
|
<line x1="340" y1="117" x2="475" y2="117" stroke="#eee" stroke-width="1"/>
|
|
|
|
<text x="377" y="132" fill="#333" font-size="11" text-anchor="middle">sat</text>
|
|
<text x="447" y="132" fill="#e74c3c" font-size="12" text-anchor="middle" font-weight="bold">1</text>
|
|
<line x1="340" y1="137" x2="475" y2="137" stroke="#eee" stroke-width="1"/>
|
|
|
|
<text x="377" y="152" fill="#333" font-size="11" text-anchor="middle">on</text>
|
|
<text x="447" y="152" fill="#e74c3c" font-size="12" text-anchor="middle" font-weight="bold">1</text>
|
|
<line x1="340" y1="157" x2="475" y2="157" stroke="#eee" stroke-width="1"/>
|
|
|
|
<text x="377" y="172" fill="#333" font-size="11" text-anchor="middle">mat</text>
|
|
<text x="447" y="172" fill="#e74c3c" font-size="12" text-anchor="middle" font-weight="bold">1</text>
|
|
|
|
<!-- Arrow to vector -->
|
|
<line x1="475" y1="120" x2="520" y2="120" stroke="#555" stroke-width="1.5" marker-end="url(#bow-arrow)"/>
|
|
|
|
<!-- Vector representation -->
|
|
<text x="615" y="45" fill="#333" font-size="11" text-anchor="middle" font-weight="bold">BoW Vector ∈ ℝⱽ</text>
|
|
|
|
<!-- Vector (vertical) with vocabulary labels -->
|
|
<text x="540" y="75" fill="#999" font-size="8" text-anchor="end">a</text>
|
|
<rect x="545" y="65" width="35" height="16" rx="2" fill="#27ae60" opacity="0.05" stroke="#ccc" stroke-width="0.5"/>
|
|
<text x="562" y="77" fill="#ccc" font-size="10" text-anchor="middle">0</text>
|
|
|
|
<text x="540" y="93" fill="#333" font-size="8" text-anchor="end" font-weight="bold">cat</text>
|
|
<rect x="545" y="83" width="35" height="16" rx="2" fill="#27ae60" opacity="0.2" stroke="#27ae60" stroke-width="1"/>
|
|
<text x="562" y="95" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">1</text>
|
|
|
|
<text x="540" y="111" fill="#999" font-size="8" text-anchor="end">dog</text>
|
|
<rect x="545" y="101" width="35" height="16" rx="2" fill="#27ae60" opacity="0.05" stroke="#ccc" stroke-width="0.5"/>
|
|
<text x="562" y="113" fill="#ccc" font-size="10" text-anchor="middle">0</text>
|
|
|
|
<text x="540" y="129" fill="#333" font-size="8" text-anchor="end" font-weight="bold">mat</text>
|
|
<rect x="545" y="119" width="35" height="16" rx="2" fill="#27ae60" opacity="0.2" stroke="#27ae60" stroke-width="1"/>
|
|
<text x="562" y="131" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">1</text>
|
|
|
|
<text x="540" y="147" fill="#333" font-size="8" text-anchor="end" font-weight="bold">on</text>
|
|
<rect x="545" y="137" width="35" height="16" rx="2" fill="#27ae60" opacity="0.2" stroke="#27ae60" stroke-width="1"/>
|
|
<text x="562" y="149" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">1</text>
|
|
|
|
<text x="540" y="165" fill="#333" font-size="8" text-anchor="end" font-weight="bold">sat</text>
|
|
<rect x="545" y="155" width="35" height="16" rx="2" fill="#27ae60" opacity="0.2" stroke="#27ae60" stroke-width="1"/>
|
|
<text x="562" y="167" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">1</text>
|
|
|
|
<text x="540" y="183" fill="#333" font-size="8" text-anchor="end" font-weight="bold">the</text>
|
|
<rect x="545" y="173" width="35" height="16" rx="2" fill="#27ae60" opacity="0.35" stroke="#27ae60" stroke-width="1.5"/>
|
|
<text x="562" y="185" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">2</text>
|
|
|
|
<text x="562" y="203" fill="#999" font-size="10" text-anchor="middle">⋮</text>
|
|
|
|
<!-- Bracket -->
|
|
<text x="595" y="140" fill="#333" font-size="50" text-anchor="start" font-weight="100">]</text>
|
|
<text x="535" y="140" fill="#333" font-size="50" text-anchor="end" font-weight="100">[</text>
|
|
|
|
<!-- Bottom explanation boxes -->
|
|
<rect x="30" y="230" width="200" height="80" rx="8" fill="#3498db" opacity="0.08" stroke="#3498db" stroke-width="1"/>
|
|
<text x="130" y="250" fill="#3498db" font-size="10" text-anchor="middle" font-weight="bold">Advantage</text>
|
|
<text x="130" y="266" fill="#555" font-size="10" text-anchor="middle">Simple, fast, effective</text>
|
|
<text x="130" y="280" fill="#555" font-size="10" text-anchor="middle">for document classification</text>
|
|
<text x="130" y="294" fill="#555" font-size="10" text-anchor="middle">and spam filtering</text>
|
|
|
|
<rect x="260" y="230" width="200" height="80" rx="8" fill="#e74c3c" opacity="0.08" stroke="#e74c3c" stroke-width="1"/>
|
|
<text x="360" y="250" fill="#e74c3c" font-size="10" text-anchor="middle" font-weight="bold">Limitation</text>
|
|
<text x="360" y="266" fill="#555" font-size="10" text-anchor="middle">Ignores word order:</text>
|
|
<text x="360" y="280" fill="#555" font-size="10" text-anchor="middle">"dog bites man" and</text>
|
|
<text x="360" y="294" fill="#555" font-size="10" text-anchor="middle">"man bites dog" are identical</text>
|
|
|
|
<rect x="490" y="230" width="200" height="80" rx="8" fill="#27ae60" opacity="0.08" stroke="#27ae60" stroke-width="1"/>
|
|
<text x="590" y="250" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">Dimensionality</text>
|
|
<text x="590" y="266" fill="#555" font-size="10" text-anchor="middle">Vector has V dimensions</text>
|
|
<text x="590" y="280" fill="#555" font-size="10" text-anchor="middle">(one per vocab word).</text>
|
|
<text x="590" y="294" fill="#555" font-size="10" text-anchor="middle">Very sparse: mostly zeros.</text>
|
|
</svg> |