Files

62 lines
4.6 KiB
XML

<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 750 280" width="750" height="280">
<defs>
<marker id="rag-arr" markerWidth="8" markerHeight="6" refX="8" refY="3" orient="auto">
<path d="M0,0 L8,3 L0,6 Z" fill="#666"/>
</marker>
</defs>
<text x="375" y="22" text-anchor="middle" font-family="Arial, sans-serif" font-size="14" font-weight="bold" fill="#333">RAG Architecture: Retrieval-Augmented Generation</text>
<!-- User query -->
<rect x="30" y="50" width="100" height="40" rx="8" fill="#555" fill-opacity="0.1" stroke="#555" stroke-width="1.5"/>
<text x="80" y="75" text-anchor="middle" font-family="Arial, sans-serif" font-size="10" font-weight="bold" fill="#555">User Query</text>
<line x1="130" y1="70" x2="165" y2="70" stroke="#666" stroke-width="1.5" marker-end="url(#rag-arr)"/>
<!-- Query embedding -->
<rect x="173" y="45" width="100" height="50" rx="8" fill="#3498db" fill-opacity="0.1" stroke="#3498db" stroke-width="1.5"/>
<text x="223" y="68" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" font-weight="bold" fill="#3498db">Embed</text>
<text x="223" y="82" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" fill="#3498db">query (~5ms)</text>
<line x1="273" y1="70" x2="308" y2="70" stroke="#666" stroke-width="1.5" marker-end="url(#rag-arr)"/>
<!-- Vector search -->
<rect x="316" y="45" width="110" height="50" rx="8" fill="#f39c12" fill-opacity="0.1" stroke="#f39c12" stroke-width="1.5"/>
<text x="371" y="65" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" font-weight="bold" fill="#f39c12">Vector Search</text>
<text x="371" y="80" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" fill="#f39c12">top-K chunks (~2ms)</text>
<!-- Document store -->
<rect x="340" y="120" width="70" height="35" rx="6" fill="#f39c12" fill-opacity="0.06" stroke="#f39c12" stroke-width="1"/>
<text x="375" y="142" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#f39c12">44K chunks</text>
<line x1="375" y1="95" x2="375" y2="120" stroke="#f39c12" stroke-width="1" stroke-dasharray="3,2"/>
<line x1="426" y1="70" x2="461" y2="70" stroke="#666" stroke-width="1.5" marker-end="url(#rag-arr)"/>
<!-- Optional rerank -->
<rect x="469" y="45" width="90" height="50" rx="8" fill="#9b59b6" fill-opacity="0.1" stroke="#9b59b6" stroke-width="1.5"/>
<text x="514" y="65" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" font-weight="bold" fill="#9b59b6">Rerank</text>
<text x="514" y="80" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#9b59b6">cross-encoder (~20ms)</text>
<!-- LLM -->
<line x1="514" y1="95" x2="514" y2="155" stroke="#666" stroke-width="1.5" marker-end="url(#rag-arr)"/>
<!-- Also send original query to LLM -->
<path d="M 80,90 L 80,190 L 340,190" fill="none" stroke="#555" stroke-width="1.2" marker-end="url(#rag-arr)" stroke-dasharray="5,3"/>
<text x="200" y="205" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#555">original query</text>
<rect x="348" y="163" width="330" height="60" rx="10" fill="#27ae60" fill-opacity="0.08" stroke="#27ae60" stroke-width="2"/>
<text x="513" y="185" text-anchor="middle" font-family="Arial, sans-serif" font-size="11" font-weight="bold" fill="#27ae60">LLM Generation</text>
<text x="513" y="200" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" fill="#27ae60">prompt = system instruction + retrieved chunks + query</text>
<text x="513" y="213" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#27ae60">~500-2000ms (dominates latency)</text>
<!-- Response -->
<line x1="513" y1="223" x2="513" y2="253" stroke="#666" stroke-width="1.5" marker-end="url(#rag-arr)"/>
<rect x="440" y="255" width="146" height="22" rx="6" fill="#27ae60" fill-opacity="0.15" stroke="#27ae60" stroke-width="1.5"/>
<text x="513" y="270" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" font-weight="bold" fill="#27ae60">Response + Citations</text>
<!-- Ingestion pipeline (offline) -->
<rect x="30" y="120" width="250" height="55" rx="8" fill="#eee" stroke="#ccc" stroke-width="1"/>
<text x="155" y="138" text-anchor="middle" font-family="Arial, sans-serif" font-size="9" font-weight="bold" fill="#666">Offline Ingestion Pipeline</text>
<text x="155" y="155" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#666">Documents → Chunk → Embed → Store in Vector DB</text>
<text x="155" y="167" text-anchor="middle" font-family="Arial, sans-serif" font-size="8" fill="#999">(runs when docs are updated)</text>
</svg>