Files

40 lines
2.5 KiB
XML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<svg width="700" height="280" xmlns="http://www.w3.org/2000/svg">
<defs>
<marker id="td-arrow" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
<path d="M0,0 L8,3 L0,6" fill="none" stroke="#555" stroke-width="1"/>
</marker>
</defs>
<text x="350" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">TD Learning: Bootstrapping from the Next State</text>
<!-- State s_t -->
<circle cx="150" cy="100" r="35" fill="#3498db" opacity="0.15" stroke="#3498db" stroke-width="2"/>
<text x="150" y="97" fill="#3498db" font-size="13" font-weight="bold" text-anchor="middle">s_t</text>
<text x="150" y="113" fill="#3498db" font-size="10" text-anchor="middle">V(s_t)</text>
<!-- Arrow with reward -->
<line x1="185" y1="100" x2="355" y2="100" stroke="#555" stroke-width="2" marker-end="url(#td-arrow)"/>
<text x="270" y="90" fill="#f39c12" font-size="12" font-weight="bold" text-anchor="middle">r_t</text>
<text x="270" y="118" fill="#666" font-size="10" text-anchor="middle">take action a_t</text>
<!-- State s_{t+1} -->
<circle cx="400" cy="100" r="35" fill="#27ae60" opacity="0.15" stroke="#27ae60" stroke-width="2"/>
<text x="400" y="97" fill="#27ae60" font-size="13" font-weight="bold" text-anchor="middle">s_{t+1}</text>
<text x="400" y="113" fill="#27ae60" font-size="10" text-anchor="middle">V(s_{t+1})</text>
<!-- Dots to future -->
<text x="475" y="104" fill="#999" font-size="16">...</text>
<!-- TD Target bracket -->
<line x1="230" y1="145" x2="230" y2="160" stroke="#e74c3c" stroke-width="1.5"/>
<line x1="230" y1="160" x2="440" y2="160" stroke="#e74c3c" stroke-width="1.5"/>
<line x1="440" y1="145" x2="440" y2="160" stroke="#e74c3c" stroke-width="1.5"/>
<text x="335" y="178" fill="#e74c3c" font-size="11" font-weight="bold" text-anchor="middle">TD target = r_t + gamma * V(s_{t+1})</text>
<!-- Update formula box -->
<rect x="115" y="200" width="470" height="55" rx="6" fill="#f5f5f5" stroke="#333" stroke-width="1"/>
<text x="350" y="222" fill="#333" font-size="12" text-anchor="middle" font-weight="bold">V(s_t) ← V(s_t) + α · [ r_t + γ · V(s_t+1) - V(s_t) ]</text>
<text x="350" y="244" fill="#666" font-size="10" text-anchor="middle">TD error = (actual reward + estimated future) - current estimate</text>
<!-- Key insight -->
<text x="350" y="272" fill="#9b59b6" font-size="10" text-anchor="middle">No need to wait for episode end. Learn from each single step (bootstrapping).</text>
</svg>