Deployed 2536c93 with MkDocs version: 1.6.1

This commit is contained in:
2026-05-03 11:47:30 +08:00
commit 1ff86b66fc
418 changed files with 617336 additions and 0 deletions
@@ -0,0 +1,90 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 700 280" width="700" height="280" font-family="Arial, sans-serif">
<defs>
<marker id="od-arrow" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
<path d="M0,0 L8,3 L0,6" fill="#666"/>
</marker>
</defs>
<!-- Title -->
<text x="350" y="24" font-size="14" font-weight="bold" fill="#333" text-anchor="middle">OCR-Free Document Understanding</text>
<!-- Document image -->
<rect x="20" y="50" width="110" height="150" rx="4" fill="white" stroke="#ccc" stroke-width="1.5"/>
<!-- Fake text lines in document -->
<rect x="30" y="60" width="70" height="5" rx="1" fill="#ddd"/>
<rect x="30" y="70" width="85" height="5" rx="1" fill="#ddd"/>
<rect x="30" y="80" width="60" height="5" rx="1" fill="#ddd"/>
<rect x="30" y="90" width="75" height="5" rx="1" fill="#ddd"/>
<!-- Fake title in document -->
<rect x="35" y="55" width="50" height="3" rx="1" fill="#999"/>
<!-- Fake table -->
<rect x="30" y="105" width="90" height="40" rx="2" fill="none" stroke="#bbb" stroke-width="1"/>
<line x1="30" y1="115" x2="120" y2="115" stroke="#bbb" stroke-width="0.5"/>
<line x1="30" y1="125" x2="120" y2="125" stroke="#bbb" stroke-width="0.5"/>
<line x1="30" y1="135" x2="120" y2="135" stroke="#bbb" stroke-width="0.5"/>
<line x1="60" y1="105" x2="60" y2="145" stroke="#bbb" stroke-width="0.5"/>
<line x1="90" y1="105" x2="90" y2="145" stroke="#bbb" stroke-width="0.5"/>
<!-- Fake chart area -->
<rect x="30" y="155" width="40" height="30" rx="2" fill="#3498db" fill-opacity="0.1" stroke="#3498db" stroke-width="0.5"/>
<rect x="33" y="168" width="8" height="15" rx="1" fill="#3498db" fill-opacity="0.4"/>
<rect x="43" y="162" width="8" height="21" rx="1" fill="#3498db" fill-opacity="0.4"/>
<rect x="53" y="172" width="8" height="11" rx="1" fill="#3498db" fill-opacity="0.4"/>
<!-- More text -->
<rect x="75" y="158" width="45" height="4" rx="1" fill="#ddd"/>
<rect x="75" y="167" width="35" height="4" rx="1" fill="#ddd"/>
<rect x="75" y="176" width="40" height="4" rx="1" fill="#ddd"/>
<text x="75" y="218" font-size="9" fill="#666" text-anchor="middle">Document Image</text>
<!-- Arrow to Vision Encoder -->
<line x1="130" y1="125" x2="175" y2="125" stroke="#3498db" stroke-width="1.5" marker-end="url(#od-arrow)"/>
<!-- Vision Encoder -->
<rect x="180" y="85" width="120" height="55" rx="8" fill="#3498db" fill-opacity="0.12" stroke="#3498db" stroke-width="1.5"/>
<text x="240" y="108" font-size="10" fill="#333" text-anchor="middle">Vision Encoder</text>
<text x="240" y="124" font-size="9" fill="#666" text-anchor="middle">(Swin / ViT)</text>
<!-- Arrow to Text Decoder -->
<line x1="300" y1="112" x2="345" y2="112" stroke="#666" stroke-width="1.5" marker-end="url(#od-arrow)"/>
<!-- Text Decoder -->
<rect x="350" y="85" width="120" height="55" rx="8" fill="#e74c3c" fill-opacity="0.12" stroke="#e74c3c" stroke-width="1.5"/>
<text x="410" y="108" font-size="10" fill="#333" text-anchor="middle">Text Decoder</text>
<text x="410" y="124" font-size="9" fill="#666" text-anchor="middle">(Transformer)</text>
<!-- Arrow to Output -->
<line x1="470" y1="112" x2="505" y2="112" stroke="#666" stroke-width="1.5" marker-end="url(#od-arrow)"/>
<!-- Structured Output -->
<rect x="510" y="65" width="170" height="112" rx="6" fill="#27ae60" fill-opacity="0.06" stroke="#27ae60" stroke-width="1.2"/>
<text x="595" y="82" font-size="10" fill="#27ae60" text-anchor="middle" font-weight="bold">Structured Output</text>
<text x="525" y="100" font-size="9" fill="#333" font-family="monospace">{"title": "Report",</text>
<text x="525" y="114" font-size="9" fill="#333" font-family="monospace"> "date": "2024-01",</text>
<text x="525" y="128" font-size="9" fill="#333" font-family="monospace"> "table": [</text>
<text x="525" y="142" font-size="9" fill="#333" font-family="monospace"> ["A", "B", "C"],</text>
<text x="525" y="156" font-size="9" fill="#333" font-family="monospace"> [1, 2, 3]</text>
<text x="525" y="170" font-size="9" fill="#333" font-family="monospace"> ]}</text>
<!-- Crossed-out OCR box -->
<rect x="220" y="165" width="70" height="32" rx="6" fill="#e74c3c" fill-opacity="0.06" stroke="#e74c3c" stroke-width="1.2" stroke-dasharray="4,3"/>
<text x="255" y="185" font-size="11" fill="#e74c3c" text-anchor="middle">OCR</text>
<!-- Cross-out X -->
<line x1="225" y1="168" x2="285" y2="194" stroke="#e74c3c" stroke-width="2.5"/>
<line x1="285" y1="168" x2="225" y2="194" stroke="#e74c3c" stroke-width="2.5"/>
<text x="255" y="210" font-size="8" fill="#e74c3c" text-anchor="middle">Not needed!</text>
<!-- Direct path label -->
<text x="310" y="60" font-size="9" fill="#27ae60" text-anchor="middle" font-weight="bold">Direct: pixels to text</text>
<path d="M 180 60 L 470 60" fill="none" stroke="#27ae60" stroke-width="1" stroke-dasharray="3,3"/>
<!-- Model names -->
<text x="350" y="248" font-size="11" fill="#333" text-anchor="middle" font-weight="bold">Donut / Pix2Struct / Nougat</text>
<text x="350" y="268" font-size="9" fill="#999" text-anchor="middle">End-to-end: no OCR engine, no text detection, no post-processing</text>
</svg>

After

Width:  |  Height:  |  Size: 5.1 KiB