Files

80 lines
4.8 KiB
XML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<svg width="680" height="300" xmlns="http://www.w3.org/2000/svg">
<text x="340" y="22" fill="#333" font-size="14" font-weight="bold" text-anchor="middle">Scaling Laws: Loss Decreases as a Power Law</text>
<!-- Axes -->
<line x1="80" y1="250" x2="400" y2="250" stroke="#333" stroke-width="1.5"/>
<line x1="80" y1="250" x2="80" y2="55" stroke="#333" stroke-width="1.5"/>
<!-- X axis label -->
<text x="240" y="275" fill="#333" font-size="11" text-anchor="middle">Parameters / Data / Compute (log scale)</text>
<!-- Y axis label -->
<text x="35" y="155" fill="#333" font-size="11" text-anchor="middle" transform="rotate(-90, 35, 155)">Loss (log scale)</text>
<!-- Log-scale tick marks on X -->
<line x1="120" y1="250" x2="120" y2="255" stroke="#333" stroke-width="1"/>
<text x="120" y="268" fill="#666" font-size="9" text-anchor="middle">10⁷</text>
<line x1="200" y1="250" x2="200" y2="255" stroke="#333" stroke-width="1"/>
<text x="200" y="268" fill="#666" font-size="9" text-anchor="middle">10⁸</text>
<line x1="280" y1="250" x2="280" y2="255" stroke="#333" stroke-width="1"/>
<text x="280" y="268" fill="#666" font-size="9" text-anchor="middle">10⁹</text>
<line x1="360" y1="250" x2="360" y2="255" stroke="#333" stroke-width="1"/>
<text x="360" y="268" fill="#666" font-size="9" text-anchor="middle">10¹⁰</text>
<!-- Log-scale tick marks on Y -->
<line x1="75" y1="230" x2="80" y2="230" stroke="#333" stroke-width="1"/>
<text x="68" y="234" fill="#666" font-size="9" text-anchor="end">2.0</text>
<line x1="75" y1="190" x2="80" y2="190" stroke="#333" stroke-width="1"/>
<text x="68" y="194" fill="#666" font-size="9" text-anchor="end">2.5</text>
<line x1="75" y1="150" x2="80" y2="150" stroke="#333" stroke-width="1"/>
<text x="68" y="154" fill="#666" font-size="9" text-anchor="end">3.0</text>
<line x1="75" y1="110" x2="80" y2="110" stroke="#333" stroke-width="1"/>
<text x="68" y="114" fill="#666" font-size="9" text-anchor="end">3.5</text>
<line x1="75" y1="70" x2="80" y2="70" stroke="#333" stroke-width="1"/>
<text x="68" y="74" fill="#666" font-size="9" text-anchor="end">4.0</text>
<!-- Grid lines (subtle) -->
<line x1="80" y1="230" x2="400" y2="230" stroke="#eee" stroke-width="1"/>
<line x1="80" y1="190" x2="400" y2="190" stroke="#eee" stroke-width="1"/>
<line x1="80" y1="150" x2="400" y2="150" stroke="#eee" stroke-width="1"/>
<line x1="80" y1="110" x2="400" y2="110" stroke="#eee" stroke-width="1"/>
<!-- Power law curve (smooth decreasing) -->
<path d="M 100,95 Q 140,120 180,155 Q 220,180 260,200 Q 310,218 370,232" fill="none" stroke="#3498db" stroke-width="3"/>
<!-- Data points along the curve -->
<circle cx="110" cy="102" r="4" fill="#3498db"/>
<circle cx="140" cy="125" r="4" fill="#3498db"/>
<circle cx="170" cy="147" r="4" fill="#3498db"/>
<circle cx="200" cy="165" r="4" fill="#3498db"/>
<circle cx="240" cy="185" r="4" fill="#3498db"/>
<circle cx="280" cy="203" r="4" fill="#3498db"/>
<circle cx="320" cy="218" r="4" fill="#3498db"/>
<circle cx="360" cy="230" r="4" fill="#3498db"/>
<!-- Label on curve -->
<text x="290" y="170" fill="#3498db" font-size="11" font-weight="bold">L(N) ∝ N^{-α}</text>
<!-- Right side: key findings -->
<rect x="430" y="50" width="235" height="235" rx="8" fill="#f5f5f5" stroke="#ddd" stroke-width="1"/>
<text x="547" y="75" fill="#333" font-size="11" text-anchor="middle" font-weight="bold">Key Findings</text>
<!-- Kaplan -->
<rect x="445" y="85" width="205" height="55" rx="5" fill="#3498db" opacity="0.08" stroke="#3498db" stroke-width="1"/>
<text x="547" y="102" fill="#3498db" font-size="10" text-anchor="middle" font-weight="bold">Kaplan et al. (2020)</text>
<text x="547" y="117" fill="#555" font-size="9" text-anchor="middle">Loss follows power laws in N, D, C.</text>
<text x="547" y="130" fill="#555" font-size="9" text-anchor="middle">Bigger models are more sample-efficient.</text>
<!-- Chinchilla -->
<rect x="445" y="148" width="205" height="55" rx="5" fill="#e74c3c" opacity="0.08" stroke="#e74c3c" stroke-width="1"/>
<text x="547" y="165" fill="#e74c3c" font-size="10" text-anchor="middle" font-weight="bold">Chinchilla (Hoffmann, 2022)</text>
<text x="547" y="180" fill="#555" font-size="9" text-anchor="middle">Scale params and data equally.</text>
<text x="547" y="193" fill="#555" font-size="9" text-anchor="middle">Rule: ~20 tokens per parameter.</text>
<!-- Implication -->
<rect x="445" y="211" width="205" height="55" rx="5" fill="#27ae60" opacity="0.08" stroke="#27ae60" stroke-width="1"/>
<text x="547" y="228" fill="#27ae60" font-size="10" text-anchor="middle" font-weight="bold">Implication</text>
<text x="547" y="243" fill="#555" font-size="9" text-anchor="middle">Performance is predictable.</text>
<text x="547" y="256" fill="#555" font-size="9" text-anchor="middle">Invest more compute → lower loss.</text>
</svg>