2536c937e3
翻译自英文原版 maths-cs-ai-compendium,共 20 章全部完成。 第01章 向量 | 第02章 矩阵 | 第03章 微积分 第04章 统计学 | 第05章 概率论 | 第06章 机器学习 第07章 计算语言学 | 第08章 计算机视觉 | 第09章 音频与语音 第10章 多模态学习 | 第11章 自主系统 | 第12章 图神经网络 第13章 计算与操作系统 | 第14章 数据结构与算法 第15章 生产级软件工程 | 第16章 SIMD与GPU编程 第17章 AI推理 | 第18章 ML系统设计 第19章 应用人工智能 | 第20章 前沿人工智能 翻译说明: - 所有数学公式 $...$ / $$...$$、代码块、图片引用完整保留 - mkdocs.yml 配置中文导航 + language: zh - README.md 已翻译为中文(兼 docs/index.md) - docs/ 目录包含指向各章文件的 symlink - 约 29,000 行中文内容,排除 .cache/ 构建缓存
107 lines
7.3 KiB
XML
107 lines
7.3 KiB
XML
<svg width="700" height="260" xmlns="http://www.w3.org/2000/svg">
|
||
<defs>
|
||
<marker id="mrcnn-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
|
||
<polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
|
||
</marker>
|
||
</defs>
|
||
<text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Mask R-CNN: Instance Segmentation</text>
|
||
|
||
<!-- Input image -->
|
||
<rect x="15" y="50" width="70" height="70" rx="4" fill="#ecf0f1" stroke="#999" stroke-width="1.5"/>
|
||
<rect x="28" y="62" width="30" height="22" rx="2" fill="#3498db" opacity="0.3" stroke="#3498db" stroke-width="1"/>
|
||
<rect x="42" y="88" width="25" height="18" rx="2" fill="#e74c3c" opacity="0.3" stroke="#e74c3c" stroke-width="1"/>
|
||
<text x="50" y="135" fill="#333" font-size="8" text-anchor="middle">Image</text>
|
||
|
||
<!-- Arrow -->
|
||
<line x1="90" y1="85" x2="115" y2="85" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
|
||
<!-- Backbone + FPN -->
|
||
<rect x="120" y="55" width="80" height="60" rx="6" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1.5"/>
|
||
<text x="160" y="80" fill="#3498db" font-size="9" text-anchor="middle" font-weight="bold">Backbone</text>
|
||
<text x="160" y="93" fill="#3498db" font-size="8" text-anchor="middle">+ FPN</text>
|
||
|
||
<!-- Arrow -->
|
||
<line x1="200" y1="85" x2="225" y2="85" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
|
||
<!-- RPN -->
|
||
<rect x="230" y="55" width="60" height="60" rx="6" fill="#f39c12" opacity="0.12" stroke="#f39c12" stroke-width="1.5"/>
|
||
<text x="260" y="80" fill="#f39c12" font-size="9" text-anchor="middle" font-weight="bold">RPN</text>
|
||
<text x="260" y="93" fill="#f39c12" font-size="8" text-anchor="middle">proposals</text>
|
||
|
||
<!-- Arrow -->
|
||
<line x1="290" y1="85" x2="315" y2="85" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
|
||
<!-- RoIAlign -->
|
||
<rect x="320" y="50" width="80" height="70" rx="6" fill="#9b59b6" opacity="0.12" stroke="#9b59b6" stroke-width="2"/>
|
||
<text x="360" y="78" fill="#9b59b6" font-size="9" text-anchor="middle" font-weight="bold">RoIAlign</text>
|
||
<text x="360" y="93" fill="#9b59b6" font-size="8" text-anchor="middle">(no quantisation)</text>
|
||
<text x="360" y="106" fill="#9b59b6" font-size="7" text-anchor="middle">bilinear interp</text>
|
||
|
||
<!-- Three output branches -->
|
||
<!-- Branch 1: Classification (top) -->
|
||
<line x1="400" y1="70" x2="450" y2="45" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
<rect x="455" y="30" width="100" height="35" rx="6" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1.5"/>
|
||
<text x="505" y="48" fill="#3498db" font-size="9" text-anchor="middle" font-weight="bold">Classification</text>
|
||
<text x="505" y="60" fill="#3498db" font-size="8" text-anchor="middle">class label</text>
|
||
|
||
<!-- Arrow to output -->
|
||
<line x1="555" y1="47" x2="590" y2="47" stroke="#555" stroke-width="1" marker-end="url(#mrcnn-arrow)"/>
|
||
<text x="625" y="42" fill="#333" font-size="8" text-anchor="middle" font-weight="bold">cat: 0.97</text>
|
||
<text x="625" y="55" fill="#333" font-size="8" text-anchor="middle" font-weight="bold">dog: 0.95</text>
|
||
|
||
<!-- Branch 2: Box regression (middle) -->
|
||
<line x1="400" y1="85" x2="450" y2="85" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
<rect x="455" y="70" width="100" height="35" rx="6" fill="#27ae60" opacity="0.12" stroke="#27ae60" stroke-width="1.5"/>
|
||
<text x="505" y="88" fill="#27ae60" font-size="9" text-anchor="middle" font-weight="bold">Box Regression</text>
|
||
<text x="505" y="100" fill="#27ae60" font-size="8" text-anchor="middle">Δx, Δy, Δw, Δh</text>
|
||
|
||
<!-- Arrow to output -->
|
||
<line x1="555" y1="87" x2="590" y2="87" stroke="#555" stroke-width="1" marker-end="url(#mrcnn-arrow)"/>
|
||
<rect x="595" y="72" width="40" height="30" rx="2" fill="none" stroke="#27ae60" stroke-width="1.5"/>
|
||
|
||
<!-- Branch 3: Mask (bottom) — the new part -->
|
||
<line x1="400" y1="100" x2="450" y2="130" stroke="#555" stroke-width="1.2" marker-end="url(#mrcnn-arrow)"/>
|
||
<rect x="455" y="115" width="100" height="40" rx="6" fill="#e74c3c" opacity="0.15" stroke="#e74c3c" stroke-width="2"/>
|
||
<text x="505" y="133" fill="#e74c3c" font-size="9" text-anchor="middle" font-weight="bold">Mask Head</text>
|
||
<text x="505" y="148" fill="#e74c3c" font-size="8" text-anchor="middle">m×m per class</text>
|
||
|
||
<!-- Arrow to mask output -->
|
||
<line x1="555" y1="135" x2="590" y2="135" stroke="#555" stroke-width="1" marker-end="url(#mrcnn-arrow)"/>
|
||
<!-- Small mask grid -->
|
||
<rect x="595" y="118" width="34" height="34" rx="2" fill="#eee" stroke="#e74c3c" stroke-width="1"/>
|
||
<!-- Mask pixels -->
|
||
<rect x="597" y="120" width="6" height="6" fill="#e74c3c" opacity="0.6"/>
|
||
<rect x="603" y="120" width="6" height="6" fill="#e74c3c" opacity="0.7"/>
|
||
<rect x="609" y="120" width="6" height="6" fill="#e74c3c" opacity="0.5"/>
|
||
<rect x="597" y="126" width="6" height="6" fill="#e74c3c" opacity="0.8"/>
|
||
<rect x="603" y="126" width="6" height="6" fill="#e74c3c" opacity="0.9"/>
|
||
<rect x="609" y="126" width="6" height="6" fill="#e74c3c" opacity="0.7"/>
|
||
<rect x="615" y="126" width="6" height="6" fill="#e74c3c" opacity="0.4"/>
|
||
<rect x="597" y="132" width="6" height="6" fill="#e74c3c" opacity="0.5"/>
|
||
<rect x="603" y="132" width="6" height="6" fill="#e74c3c" opacity="0.8"/>
|
||
<rect x="609" y="132" width="6" height="6" fill="#e74c3c" opacity="0.9"/>
|
||
<rect x="615" y="132" width="6" height="6" fill="#e74c3c" opacity="0.6"/>
|
||
<rect x="603" y="138" width="6" height="6" fill="#e74c3c" opacity="0.5"/>
|
||
<rect x="609" y="138" width="6" height="6" fill="#e74c3c" opacity="0.7"/>
|
||
<rect x="615" y="138" width="6" height="6" fill="#e74c3c" opacity="0.4"/>
|
||
<rect x="621" y="138" width="6" height="6" fill="#e74c3c" opacity="0.3"/>
|
||
<text x="640" y="138" fill="#e74c3c" font-size="7" text-anchor="start">28×28</text>
|
||
|
||
<!-- Highlight: this is the extension from Faster R-CNN -->
|
||
<rect x="440" y="110" width="130" height="50" rx="8" fill="none" stroke="#e74c3c" stroke-width="1" stroke-dasharray="4,2"/>
|
||
<text x="505" y="172" fill="#e74c3c" font-size="8" text-anchor="middle" font-style="italic">← Added to Faster R-CNN</text>
|
||
|
||
<!-- Bottom note boxes -->
|
||
<rect x="30" y="190" width="310" height="60" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
|
||
<text x="185" y="208" fill="#333" font-size="9" text-anchor="middle" font-weight="bold">RoIAlign vs RoI Pooling</text>
|
||
<text x="185" y="222" fill="#666" font-size="8" text-anchor="middle">RoI Pooling quantises to grid → misalignment</text>
|
||
<text x="185" y="235" fill="#666" font-size="8" text-anchor="middle">RoIAlign uses bilinear interpolation at exact</text>
|
||
<text x="185" y="246" fill="#666" font-size="8" text-anchor="middle">positions → precise spatial features for masks</text>
|
||
|
||
<rect x="355" y="190" width="330" height="60" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
|
||
<text x="520" y="208" fill="#333" font-size="9" text-anchor="middle" font-weight="bold">Multi-task Loss</text>
|
||
<text x="520" y="222" fill="#666" font-size="8" text-anchor="middle">L = L_cls + L_box + L_mask</text>
|
||
<text x="520" y="236" fill="#666" font-size="8" text-anchor="middle">Mask branch predicts per-class binary masks.</text>
|
||
<text x="520" y="248" fill="#666" font-size="8" text-anchor="middle">Only the mask for the predicted class is used.</text>
|
||
</svg>
|