Files
maths-cs-ai-compendium-zh/images/moe_layer.svg
T

72 lines
4.8 KiB
XML

<svg width="700" height="300" xmlns="http://www.w3.org/2000/svg">
<defs>
<marker id="moe-arrow" markerWidth="7" markerHeight="5" refX="7" refY="2.5" orient="auto">
<polygon points="0 0, 7 2.5, 0 5" fill="#555"/>
</marker>
</defs>
<text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Mixture of Experts (MoE) Layer</text>
<!-- Input token -->
<rect x="30" y="120" width="80" height="40" rx="8" fill="#95a5a6" opacity="0.2" stroke="#333" stroke-width="1.5"/>
<text x="70" y="145" fill="#333" font-size="12" text-anchor="middle" font-weight="bold">Token x</text>
<!-- Router -->
<line x1="110" y1="140" x2="155" y2="140" stroke="#555" stroke-width="1.5" marker-end="url(#moe-arrow)"/>
<rect x="160" y="110" width="90" height="60" rx="8" fill="#f39c12" opacity="0.15" stroke="#f39c12" stroke-width="2"/>
<text x="205" y="137" fill="#f39c12" font-size="11" text-anchor="middle" font-weight="bold">Router</text>
<text x="205" y="153" fill="#f39c12" font-size="9" text-anchor="middle">softmax(Wg·x)</text>
<!-- Router outputs to experts -->
<line x1="250" y1="120" x2="320" y2="55" stroke="#e74c3c" stroke-width="2" marker-end="url(#moe-arrow)"/>
<line x1="250" y1="130" x2="320" y2="105" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<line x1="250" y1="140" x2="320" y2="155" stroke="#3498db" stroke-width="2" marker-end="url(#moe-arrow)"/>
<line x1="250" y1="150" x2="320" y2="205" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<line x1="250" y1="160" x2="320" y2="255" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<!-- Gate scores -->
<text x="278" y="72" fill="#e74c3c" font-size="9" font-weight="bold">g₁=0.7</text>
<text x="282" y="170" fill="#3498db" font-size="9" font-weight="bold">g₃=0.3</text>
<!-- Expert 1 (active, red) -->
<rect x="325" y="35" width="100" height="40" rx="6" fill="#e74c3c" opacity="0.15" stroke="#e74c3c" stroke-width="2"/>
<text x="375" y="52" fill="#e74c3c" font-size="11" text-anchor="middle" font-weight="bold">Expert 1</text>
<text x="375" y="66" fill="#e74c3c" font-size="9" text-anchor="middle">FFN</text>
<!-- Expert 2 (inactive, grey) -->
<rect x="325" y="85" width="100" height="40" rx="6" fill="#95a5a6" opacity="0.1" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<text x="375" y="102" fill="#bbb" font-size="11" text-anchor="middle">Expert 2</text>
<text x="375" y="116" fill="#bbb" font-size="9" text-anchor="middle">FFN</text>
<!-- Expert 3 (active, blue) -->
<rect x="325" y="135" width="100" height="40" rx="6" fill="#3498db" opacity="0.15" stroke="#3498db" stroke-width="2"/>
<text x="375" y="152" fill="#3498db" font-size="11" text-anchor="middle" font-weight="bold">Expert 3</text>
<text x="375" y="166" fill="#3498db" font-size="9" text-anchor="middle">FFN</text>
<!-- Expert 4 (inactive) -->
<rect x="325" y="185" width="100" height="40" rx="6" fill="#95a5a6" opacity="0.1" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<text x="375" y="202" fill="#bbb" font-size="11" text-anchor="middle">Expert 4</text>
<text x="375" y="216" fill="#bbb" font-size="9" text-anchor="middle">FFN</text>
<!-- Expert 5 (inactive) -->
<rect x="325" y="235" width="100" height="40" rx="6" fill="#95a5a6" opacity="0.1" stroke="#95a5a6" stroke-width="1" stroke-dasharray="4,3"/>
<text x="375" y="252" fill="#bbb" font-size="11" text-anchor="middle">Expert 5</text>
<text x="375" y="266" fill="#bbb" font-size="9" text-anchor="middle">FFN</text>
<!-- Weighted sum -->
<line x1="425" y1="55" x2="490" y2="105" stroke="#e74c3c" stroke-width="1.5" marker-end="url(#moe-arrow)"/>
<line x1="425" y1="155" x2="490" y2="125" stroke="#3498db" stroke-width="1.5" marker-end="url(#moe-arrow)"/>
<circle cx="510" cy="115" r="20" fill="#27ae60" opacity="0.15" stroke="#27ae60" stroke-width="2"/>
<text x="510" y="112" fill="#27ae60" font-size="14" text-anchor="middle" font-weight="bold">+</text>
<text x="510" y="126" fill="#27ae60" font-size="8" text-anchor="middle">weighted</text>
<!-- Output -->
<line x1="530" y1="115" x2="570" y2="115" stroke="#555" stroke-width="1.5" marker-end="url(#moe-arrow)"/>
<rect x="575" y="95" width="90" height="40" rx="8" fill="#27ae60" opacity="0.12" stroke="#27ae60" stroke-width="1.5"/>
<text x="620" y="112" fill="#27ae60" font-size="11" text-anchor="middle" font-weight="bold">Output</text>
<text x="620" y="127" fill="#27ae60" font-size="9" text-anchor="middle">g₁E₁ + g₃E₃</text>
<!-- Legend -->
<rect x="80" y="278" width="540" height="18" rx="4" fill="#f5f5f5" stroke="#ddd" stroke-width="1"/>
<text x="350" y="291" fill="#555" font-size="10" text-anchor="middle">Top-2 routing: only 2 of 5 experts are active per token. Solid = selected, dashed = skipped.</text>
</svg>