Files
maths-cs-ai-compendium-zh/images/deeplab_aspp.svg
T

106 lines
7.5 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<svg width="700" height="280" xmlns="http://www.w3.org/2000/svg">
<defs>
<marker id="dl-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
<polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
</marker>
</defs>
<text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">DeepLab: Atrous Spatial Pyramid Pooling (ASPP)</text>
<!-- Left: Atrous convolution illustration -->
<text x="130" y="48" fill="#333" font-size="11" font-weight="bold" text-anchor="middle">Atrous (Dilated) Convolution</text>
<!-- Rate 1 (standard 3x3) -->
<text x="60" y="70" fill="#3498db" font-size="9" text-anchor="middle" font-weight="bold">rate=1</text>
<g transform="translate(25, 75)">
<rect x="0" y="0" width="70" height="70" fill="#eee" stroke="#ccc" stroke-width="0.5"/>
<!-- 5x5 grid, centre 3x3 filled -->
<rect x="14" y="14" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="28" y="14" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="42" y="14" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="14" y="28" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="28" y="28" width="14" height="14" fill="#3498db" opacity="0.6" stroke="#3498db" stroke-width="1"/>
<rect x="42" y="28" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="14" y="42" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="28" y="42" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
<rect x="42" y="42" width="14" height="14" fill="#3498db" opacity="0.4" stroke="#3498db" stroke-width="1"/>
</g>
<text x="60" y="158" fill="#3498db" font-size="8" text-anchor="middle">RF: 3×3</text>
<!-- Rate 2 -->
<text x="170" y="70" fill="#e74c3c" font-size="9" text-anchor="middle" font-weight="bold">rate=2</text>
<g transform="translate(135, 75)">
<rect x="0" y="0" width="70" height="70" fill="#eee" stroke="#ccc" stroke-width="0.5"/>
<!-- Dilated: skip one -->
<rect x="0" y="0" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="28" y="0" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="56" y="0" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="0" y="28" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="28" y="28" width="14" height="14" fill="#e74c3c" opacity="0.6" stroke="#e74c3c" stroke-width="1"/>
<rect x="56" y="28" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="0" y="56" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="28" y="56" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
<rect x="56" y="56" width="14" height="14" fill="#e74c3c" opacity="0.4" stroke="#e74c3c" stroke-width="1"/>
</g>
<text x="170" y="158" fill="#e74c3c" font-size="8" text-anchor="middle">RF: 5×5, 9 params</text>
<!-- ASPP module (right side) -->
<text x="475" y="48" fill="#333" font-size="11" font-weight="bold" text-anchor="middle">ASPP Module</text>
<!-- Input feature map -->
<rect x="290" y="68" width="60" height="60" rx="3" fill="#ecf0f1" stroke="#999" stroke-width="1.5"/>
<text x="320" y="102" fill="#333" font-size="8" text-anchor="middle">Features</text>
<!-- Parallel branches -->
<!-- Branch 1: 1x1 conv -->
<line x1="350" y1="78" x2="395" y2="68" stroke="#555" stroke-width="1" marker-end="url(#dl-arrow)"/>
<rect x="400" y="55" width="70" height="25" rx="4" fill="#3498db" opacity="0.15" stroke="#3498db" stroke-width="1.5"/>
<text x="435" y="72" fill="#3498db" font-size="8" text-anchor="middle" font-weight="bold">1×1 (r=1)</text>
<!-- Branch 2: 3x3 rate 6 -->
<line x1="350" y1="88" x2="395" y2="88" stroke="#555" stroke-width="1" marker-end="url(#dl-arrow)"/>
<rect x="400" y="82" width="70" height="25" rx="4" fill="#e74c3c" opacity="0.15" stroke="#e74c3c" stroke-width="1.5"/>
<text x="435" y="99" fill="#e74c3c" font-size="8" text-anchor="middle" font-weight="bold">3×3 (r=6)</text>
<!-- Branch 3: 3x3 rate 12 -->
<line x1="350" y1="98" x2="395" y2="108" stroke="#555" stroke-width="1" marker-end="url(#dl-arrow)"/>
<rect x="400" y="109" width="70" height="25" rx="4" fill="#27ae60" opacity="0.15" stroke="#27ae60" stroke-width="1.5"/>
<text x="435" y="126" fill="#27ae60" font-size="8" text-anchor="middle" font-weight="bold">3×3 (r=12)</text>
<!-- Branch 4: 3x3 rate 18 -->
<line x1="350" y1="108" x2="395" y2="128" stroke="#555" stroke-width="1" marker-end="url(#dl-arrow)"/>
<rect x="400" y="136" width="70" height="25" rx="4" fill="#f39c12" opacity="0.15" stroke="#f39c12" stroke-width="1.5"/>
<text x="435" y="153" fill="#f39c12" font-size="8" text-anchor="middle" font-weight="bold">3×3 (r=18)</text>
<!-- Branch 5: Global Average Pool -->
<line x1="350" y1="118" x2="395" y2="148" stroke="#555" stroke-width="1" marker-end="url(#dl-arrow)"/>
<rect x="400" y="163" width="70" height="25" rx="4" fill="#9b59b6" opacity="0.15" stroke="#9b59b6" stroke-width="1.5"/>
<text x="435" y="180" fill="#9b59b6" font-size="8" text-anchor="middle" font-weight="bold">GAP + 1×1</text>
<!-- All branches merge -->
<line x1="470" y1="68" x2="510" y2="120" stroke="#555" stroke-width="1"/>
<line x1="470" y1="95" x2="510" y2="120" stroke="#555" stroke-width="1"/>
<line x1="470" y1="122" x2="510" y2="120" stroke="#555" stroke-width="1"/>
<line x1="470" y1="149" x2="510" y2="120" stroke="#555" stroke-width="1"/>
<line x1="470" y1="176" x2="510" y2="120" stroke="#555" stroke-width="1"/>
<!-- Concat -->
<rect x="515" y="105" width="55" height="30" rx="4" fill="#333" opacity="0.08" stroke="#333" stroke-width="1.5"/>
<text x="543" y="124" fill="#333" font-size="8" text-anchor="middle" font-weight="bold">Concat</text>
<!-- 1x1 conv fusion -->
<line x1="570" y1="120" x2="595" y2="120" stroke="#555" stroke-width="1.2" marker-end="url(#dl-arrow)"/>
<rect x="600" y="105" width="55" height="30" rx="4" fill="#333" opacity="0.08" stroke="#333" stroke-width="1.5"/>
<text x="628" y="124" fill="#333" font-size="8" text-anchor="middle" font-weight="bold">1×1 conv</text>
<!-- Output -->
<line x1="655" y1="120" x2="680" y2="120" stroke="#555" stroke-width="1.2" marker-end="url(#dl-arrow)"/>
<text x="690" y="124" fill="#333" font-size="8" text-anchor="start">Out</text>
<!-- Bottom notes -->
<rect x="50" y="200" width="600" height="70" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
<text x="350" y="220" fill="#333" font-size="10" text-anchor="middle" font-weight="bold">How Atrous Convolution Works</text>
<text x="350" y="236" fill="#666" font-size="9" text-anchor="middle">Standard 3×3 filter with gaps of (rate1) between elements. Rate r gives receptive field (2r+1)×(2r+1).</text>
<text x="350" y="252" fill="#666" font-size="9" text-anchor="middle">ASPP applies multiple rates in parallel to capture context at multiple scales — like Inception but with dilation.</text>
<text x="350" y="264" fill="#666" font-size="9" text-anchor="middle">Global average pooling branch captures image-level context (what's the overall scene?).</text>
</svg>