Files
maths-cs-ai-compendium-zh/images/depthwise_separable_conv.svg
T

85 lines
6.2 KiB
XML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<svg width="700" height="280" xmlns="http://www.w3.org/2000/svg">
<defs>
<marker id="ds-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
<polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
</marker>
</defs>
<text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Depthwise Separable Convolution (MobileNet)</text>
<!-- Standard conv (top) -->
<text x="350" y="48" fill="#e74c3c" font-size="11" font-weight="bold" text-anchor="middle">Standard Convolution</text>
<!-- Input volume -->
<rect x="40" y="60" width="60" height="60" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1.5"/>
<rect x="46" y="54" width="60" height="60" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1"/>
<rect x="52" y="48" width="60" height="60" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1"/>
<text x="75" y="135" fill="#333" font-size="9" text-anchor="middle">H×W×C_in</text>
<!-- Single 3D kernel -->
<line x1="120" y1="83" x2="165" y2="83" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="170" y="65" width="35" height="35" rx="2" fill="#e74c3c" opacity="0.2" stroke="#e74c3c" stroke-width="1.5"/>
<rect x="174" y="61" width="35" height="35" rx="2" fill="#e74c3c" opacity="0.2" stroke="#e74c3c" stroke-width="1"/>
<rect x="178" y="57" width="35" height="35" rx="2" fill="#e74c3c" opacity="0.3" stroke="#e74c3c" stroke-width="1"/>
<text x="195" y="115" fill="#e74c3c" font-size="8" text-anchor="middle">k×k×C_in</text>
<text x="195" y="127" fill="#e74c3c" font-size="8" text-anchor="middle">× C_out filters</text>
<!-- Output -->
<line x1="218" y1="83" x2="258" y2="83" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="265" y="60" width="60" height="60" rx="2" fill="#9b59b6" opacity="0.12" stroke="#9b59b6" stroke-width="1.5"/>
<rect x="271" y="54" width="60" height="60" rx="2" fill="#9b59b6" opacity="0.12" stroke="#9b59b6" stroke-width="1"/>
<rect x="277" y="48" width="60" height="60" rx="2" fill="#9b59b6" opacity="0.15" stroke="#9b59b6" stroke-width="1"/>
<text x="303" y="135" fill="#333" font-size="9" text-anchor="middle">H×W×C_out</text>
<!-- Cost label -->
<text x="530" y="70" fill="#e74c3c" font-size="10" text-anchor="middle" font-weight="bold">Cost: k²·C_in·C_out</text>
<text x="530" y="88" fill="#666" font-size="9" text-anchor="middle">per spatial position</text>
<text x="530" y="108" fill="#666" font-size="9" text-anchor="middle">e.g. 3²·64·128 = 73,728</text>
<!-- Depthwise separable (bottom) -->
<text x="350" y="165" fill="#27ae60" font-size="11" font-weight="bold" text-anchor="middle">Depthwise Separable Convolution</text>
<!-- Input -->
<rect x="20" y="180" width="50" height="50" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1.5"/>
<rect x="26" y="174" width="50" height="50" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1"/>
<rect x="32" y="168" width="50" height="50" rx="2" fill="#3498db" opacity="0.12" stroke="#3498db" stroke-width="1"/>
<text x="40" y="245" fill="#333" font-size="8" text-anchor="middle">H×W×C_in</text>
<!-- Step 1: Depthwise -->
<line x1="88" y1="200" x2="118" y2="200" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="125" y="175" width="100" height="50" rx="6" fill="#27ae60" opacity="0.12" stroke="#27ae60" stroke-width="1.5"/>
<text x="175" y="196" fill="#27ae60" font-size="9" text-anchor="middle" font-weight="bold">Depthwise</text>
<text x="175" y="210" fill="#27ae60" font-size="8" text-anchor="middle">k×k × 1 per channel</text>
<text x="175" y="245" fill="#27ae60" font-size="8" text-anchor="middle">Cost: k²·C_in</text>
<!-- Intermediate -->
<line x1="225" y1="200" x2="265" y2="200" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="270" y="180" width="50" height="50" rx="2" fill="#f39c12" opacity="0.12" stroke="#f39c12" stroke-width="1.5"/>
<rect x="276" y="174" width="50" height="50" rx="2" fill="#f39c12" opacity="0.12" stroke="#f39c12" stroke-width="1"/>
<rect x="282" y="168" width="50" height="50" rx="2" fill="#f39c12" opacity="0.15" stroke="#f39c12" stroke-width="1"/>
<text x="295" y="245" fill="#333" font-size="8" text-anchor="middle">H×W×C_in</text>
<!-- Step 2: Pointwise -->
<line x1="338" y1="200" x2="368" y2="200" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="375" y="175" width="100" height="50" rx="6" fill="#f39c12" opacity="0.12" stroke="#f39c12" stroke-width="1.5"/>
<text x="425" y="196" fill="#f39c12" font-size="9" text-anchor="middle" font-weight="bold">Pointwise</text>
<text x="425" y="210" fill="#f39c12" font-size="8" text-anchor="middle">1×1 × C_in × C_out</text>
<text x="425" y="245" fill="#f39c12" font-size="8" text-anchor="middle">Cost: C_in·C_out</text>
<!-- Output -->
<line x1="475" y1="200" x2="510" y2="200" stroke="#555" stroke-width="1.2" marker-end="url(#ds-arrow)"/>
<rect x="515" y="180" width="50" height="50" rx="2" fill="#9b59b6" opacity="0.12" stroke="#9b59b6" stroke-width="1.5"/>
<rect x="521" y="174" width="50" height="50" rx="2" fill="#9b59b6" opacity="0.12" stroke="#9b59b6" stroke-width="1"/>
<rect x="527" y="168" width="50" height="50" rx="2" fill="#9b59b6" opacity="0.15" stroke="#9b59b6" stroke-width="1"/>
<text x="540" y="245" fill="#333" font-size="8" text-anchor="middle">H×W×C_out</text>
<!-- Total cost -->
<rect x="590" y="178" width="100" height="50" rx="4" fill="#f5f5f5" stroke="#27ae60" stroke-width="1.5"/>
<text x="640" y="198" fill="#27ae60" font-size="9" text-anchor="middle" font-weight="bold">Total cost:</text>
<text x="640" y="212" fill="#27ae60" font-size="8" text-anchor="middle">k²·C_in + C_in·C_out</text>
<text x="640" y="224" fill="#666" font-size="8" text-anchor="middle">≈ 9× cheaper (k=3)</text>
<!-- Bottom note -->
<rect x="100" y="258" width="500" height="18" rx="4" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
<text x="350" y="271" fill="#666" font-size="9" text-anchor="middle">Depthwise handles spatial filtering; pointwise handles channel mixing. Same output, far fewer parameters.</text>
</svg>