Files

5529 lines
168 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!doctype html>
<html lang="zh" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="description" content="一本开源的直觉优先教科书,从零开始覆盖数学、计算机科学和人工智能(中文翻译版)。">
<meta name="author" content="Henry Ndubuaku (flykhan 译)">
<link rel="canonical" href="https://flykhan.github.io/maths-cs-ai-compendium-zh/chapter%2008%3A%20computer%20vision/03.%20object%20detection%20and%20segmentation/">
<link rel="prev" href="../02.%20convolutional%20networks/">
<link rel="next" href="../04.%20vision%20transformers%20and%20generation/">
<link rel="icon" href="../../assets/images/favicon.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.6">
<title>目标检测与分割 - 数学、计算机科学与 AI 百科全书</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.484c7ddc.min.css">
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="slate" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#_1" class="md-skip">
跳转至
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="页眉">
<a href="../.." title="数学、计算机科学与 AI 百科全书" class="md-header__button md-logo" aria-label="数学、计算机科学与 AI 百科全书" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
数学、计算机科学与 AI 百科全书
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
目标检测与分割
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="slate" data-md-color-accent="indigo" aria-label="切换到深色模式" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="切换到深色模式" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="slate" data-md-color-accent="indigo" aria-label="切换到浅色模式" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="切换到浅色模式" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="搜索" placeholder="搜索" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="查找">
<button type="reset" class="md-search__icon md-icon" title="清空当前内容" aria-label="清空当前内容" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
正在初始化搜索引擎
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/flykhan/maths-cs-ai-compendium-zh" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
flykhan/maths-cs-ai-compendium-zh
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="标签" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item">
<a href="../.." class="md-tabs__link">
首页
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2001%3A%20vectors/01.%20vector%20spaces/" class="md-tabs__link">
向量
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2002%3A%20matrices/01.%20matrix%20properties/" class="md-tabs__link">
矩阵
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2003%3A%20calculus/01.%20differential%20calculus/" class="md-tabs__link">
微积分
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2004%3A%20statistics/01.%20fundamentals/" class="md-tabs__link">
统计学
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2005%3A%20probability/01.%20counting/" class="md-tabs__link">
概率论
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2006%3A%20machine%20learning/01.%20classical%20machine%20learning/" class="md-tabs__link">
机器学习
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/01.%20linguistic%20foundations/" class="md-tabs__link">
计算语言学
</a>
</li>
<li class="md-tabs__item md-tabs__item--active">
<a href="../01.%20image%20fundamentals/" class="md-tabs__link">
计算机视觉
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/01.%20digital%20signal%20processing/" class="md-tabs__link">
音频与语音
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/01.%20multimodal%20representations/" class="md-tabs__link">
多模态学习
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/01.%20perception/" class="md-tabs__link">
自主系统
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/01.%20geometric%20deep%20learning/" class="md-tabs__link">
图神经网络
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/01.%20discrete%20maths/" class="md-tabs__link">
计算机与操作系统
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/00.%20foundations/" class="md-tabs__link">
数据结构与算法
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/01.%20linux%20and%20CMD/" class="md-tabs__link">
生产级软件工程
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/00.%20why%20C%2B%2B%20and%20how%20ML%20frameworks%20work/" class="md-tabs__link">
SIMD 与 GPU 编程
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2017%3A%20AI%20inference/01.%20quantisation/" class="md-tabs__link">
AI 推理
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/01.%20systems%20design%20fundamentals/" class="md-tabs__link">
ML 系统设计
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2019%3A%20applied%20AI/01.%20AI%20for%20finance/" class="md-tabs__link">
应用 AI
</a>
</li>
<li class="md-tabs__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/01.%20quantum%20machine%20learning/" class="md-tabs__link">
前沿 AI
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="导航栏" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="数学、计算机科学与 AI 百科全书" class="md-nav__button md-logo" aria-label="数学、计算机科学与 AI 百科全书" data-md-component="logo">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
</a>
数学、计算机科学与 AI 百科全书
</label>
<div class="md-nav__source">
<a href="https://github.com/flykhan/maths-cs-ai-compendium-zh" title="前往仓库" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
flykhan/maths-cs-ai-compendium-zh
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../.." class="md-nav__link">
<span class="md-ellipsis">
首页
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_2" >
<label class="md-nav__link" for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-ellipsis">
向量
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
向量
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2001%3A%20vectors/01.%20vector%20spaces/" class="md-nav__link">
<span class="md-ellipsis">
向量空间
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2001%3A%20vectors/02.%20vector%20properties/" class="md-nav__link">
<span class="md-ellipsis">
向量性质
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2001%3A%20vectors/03.%20norms%20and%20metrics/" class="md-nav__link">
<span class="md-ellipsis">
范数与度量
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2001%3A%20vectors/04.%20products/" class="md-nav__link">
<span class="md-ellipsis">
向量积
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2001%3A%20vectors/05.%20basis%20and%20duality/" class="md-nav__link">
<span class="md-ellipsis">
基与对偶性
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_3" >
<label class="md-nav__link" for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-ellipsis">
矩阵
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
矩阵
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2002%3A%20matrices/01.%20matrix%20properties/" class="md-nav__link">
<span class="md-ellipsis">
矩阵性质
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2002%3A%20matrices/02.%20matrix%20types/" class="md-nav__link">
<span class="md-ellipsis">
矩阵类型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2002%3A%20matrices/03.%20operations/" class="md-nav__link">
<span class="md-ellipsis">
矩阵运算
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2002%3A%20matrices/04.%20linear%20transformations/" class="md-nav__link">
<span class="md-ellipsis">
线性变换
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2002%3A%20matrices/05.%20decompositions/" class="md-nav__link">
<span class="md-ellipsis">
矩阵分解
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_4" >
<label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-ellipsis">
微积分
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
微积分
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2003%3A%20calculus/01.%20differential%20calculus/" class="md-nav__link">
<span class="md-ellipsis">
微分
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2003%3A%20calculus/02.%20integral%20calculus/" class="md-nav__link">
<span class="md-ellipsis">
积分
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2003%3A%20calculus/03.%20multivariate%20calculus/" class="md-nav__link">
<span class="md-ellipsis">
多元微积分
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2003%3A%20calculus/04.%20function%20approximation/" class="md-nav__link">
<span class="md-ellipsis">
函数逼近
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2003%3A%20calculus/05.%20optimisation/" class="md-nav__link">
<span class="md-ellipsis">
优化
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
统计学
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
统计学
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2004%3A%20statistics/01.%20fundamentals/" class="md-nav__link">
<span class="md-ellipsis">
基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2004%3A%20statistics/02.%20measures/" class="md-nav__link">
<span class="md-ellipsis">
统计量
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2004%3A%20statistics/03.%20sampling/" class="md-nav__link">
<span class="md-ellipsis">
抽样
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2004%3A%20statistics/04.%20hypothesis%20testing/" class="md-nav__link">
<span class="md-ellipsis">
假设检验
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2004%3A%20statistics/05.%20inference/" class="md-nav__link">
<span class="md-ellipsis">
推断
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_6" >
<label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
<span class="md-ellipsis">
概率论
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_6">
<span class="md-nav__icon md-icon"></span>
概率论
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2005%3A%20probability/01.%20counting/" class="md-nav__link">
<span class="md-ellipsis">
计数
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2005%3A%20probability/02.%20probability%20concepts/" class="md-nav__link">
<span class="md-ellipsis">
概率概念
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2005%3A%20probability/03.%20distributions/" class="md-nav__link">
<span class="md-ellipsis">
分布
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2005%3A%20probability/04.%20bayesian/" class="md-nav__link">
<span class="md-ellipsis">
贝叶斯
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2005%3A%20probability/05.%20information%20theory/" class="md-nav__link">
<span class="md-ellipsis">
信息论
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
<label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
<span class="md-ellipsis">
机器学习
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_7">
<span class="md-nav__icon md-icon"></span>
机器学习
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2006%3A%20machine%20learning/01.%20classical%20machine%20learning/" class="md-nav__link">
<span class="md-ellipsis">
经典机器学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2006%3A%20machine%20learning/02.%20gradient%20machine%20learning/" class="md-nav__link">
<span class="md-ellipsis">
梯度机器学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2006%3A%20machine%20learning/03.%20deep%20learning/" class="md-nav__link">
<span class="md-ellipsis">
深度学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2006%3A%20machine%20learning/04.%20reinforcement%20learning/" class="md-nav__link">
<span class="md-ellipsis">
强化学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2006%3A%20machine%20learning/05.%20distributed%20deep%20learning/" class="md-nav__link">
<span class="md-ellipsis">
分布式深度学习
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_8" >
<label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0">
<span class="md-ellipsis">
计算语言学
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_8">
<span class="md-nav__icon md-icon"></span>
计算语言学
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/01.%20linguistic%20foundations/" class="md-nav__link">
<span class="md-ellipsis">
语言学基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/02.%20text%20processing%20and%20classic%20NLP/" class="md-nav__link">
<span class="md-ellipsis">
文本处理与经典 NLP
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/03.%20embeddings%20and%20sequence%20models/" class="md-nav__link">
<span class="md-ellipsis">
嵌入与序列模型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/04.%20transformers%20and%20language%20models/" class="md-nav__link">
<span class="md-ellipsis">
Transformer 与语言模型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2007%3A%20computational%20linguistics/05.%20advanced%20text%20generation/" class="md-nav__link">
<span class="md-ellipsis">
高级文本生成
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_9" checked>
<label class="md-nav__link" for="__nav_9" id="__nav_9_label" tabindex="">
<span class="md-ellipsis">
计算机视觉
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_9_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_9">
<span class="md-nav__icon md-icon"></span>
计算机视觉
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../01.%20image%20fundamentals/" class="md-nav__link">
<span class="md-ellipsis">
图像基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../02.%20convolutional%20networks/" class="md-nav__link">
<span class="md-ellipsis">
卷积网络
</span>
</a>
</li>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
目标检测与分割
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
目标检测与分割
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#colabnotebook" class="md-nav__link">
<span class="md-ellipsis">
编程练习(使用CoLab或notebook
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../04.%20vision%20transformers%20and%20generation/" class="md-nav__link">
<span class="md-ellipsis">
ViT 与生成模型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../05.%20video%20and%203D%20vision/" class="md-nav__link">
<span class="md-ellipsis">
视频与 3D 视觉
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_10" >
<label class="md-nav__link" for="__nav_10" id="__nav_10_label" tabindex="0">
<span class="md-ellipsis">
音频与语音
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_10_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_10">
<span class="md-nav__icon md-icon"></span>
音频与语音
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/01.%20digital%20signal%20processing/" class="md-nav__link">
<span class="md-ellipsis">
数字信号处理
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/02.%20automatic%20speech%20recognition/" class="md-nav__link">
<span class="md-ellipsis">
自动语音识别
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/03.%20text%20to%20speech%20and%20voice/" class="md-nav__link">
<span class="md-ellipsis">
语音合成
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/04.%20speaker%20and%20audio%20analysis/" class="md-nav__link">
<span class="md-ellipsis">
说话人与音频分析
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2009%3A%20audio%20and%20speech/05.%20source%20separation%20and%20noise/" class="md-nav__link">
<span class="md-ellipsis">
源分离与降噪
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_11" >
<label class="md-nav__link" for="__nav_11" id="__nav_11_label" tabindex="0">
<span class="md-ellipsis">
多模态学习
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_11_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_11">
<span class="md-nav__icon md-icon"></span>
多模态学习
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/01.%20multimodal%20representations/" class="md-nav__link">
<span class="md-ellipsis">
多模态表征
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/02.%20vision%20language%20models/" class="md-nav__link">
<span class="md-ellipsis">
视觉语言模型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/03.%20image%20and%20video%20tokenisation/" class="md-nav__link">
<span class="md-ellipsis">
图像与视频 Token 化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/04.%20cross-modal%20generation/" class="md-nav__link">
<span class="md-ellipsis">
跨模态生成
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2010%3A%20multimodal%20learning/05.%20unified%20multimodal%20architectures/" class="md-nav__link">
<span class="md-ellipsis">
统一多模态架构
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_12" >
<label class="md-nav__link" for="__nav_12" id="__nav_12_label" tabindex="0">
<span class="md-ellipsis">
自主系统
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_12_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_12">
<span class="md-nav__icon md-icon"></span>
自主系统
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/01.%20perception/" class="md-nav__link">
<span class="md-ellipsis">
感知
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/02.%20robot%20learning/" class="md-nav__link">
<span class="md-ellipsis">
机器人学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/03.%20vision-language-action%20models/" class="md-nav__link">
<span class="md-ellipsis">
视觉-语言-动作模型
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/04.%20self-driving/" class="md-nav__link">
<span class="md-ellipsis">
自动驾驶
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2011%3A%20autonomous%20systems/05.%20space%20and%20extreme%20robotics/" class="md-nav__link">
<span class="md-ellipsis">
太空与极端机器人
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_13" >
<label class="md-nav__link" for="__nav_13" id="__nav_13_label" tabindex="0">
<span class="md-ellipsis">
图神经网络
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_13_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_13">
<span class="md-nav__icon md-icon"></span>
图神经网络
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/01.%20geometric%20deep%20learning/" class="md-nav__link">
<span class="md-ellipsis">
几何深度学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/02.%20graph%20theory/" class="md-nav__link">
<span class="md-ellipsis">
图论
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/03.%20graph%20neural%20networks/" class="md-nav__link">
<span class="md-ellipsis">
图神经网络
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/04.%20graph%20attention%20networks/" class="md-nav__link">
<span class="md-ellipsis">
图注意力网络
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2012%3A%20graph%20neural%20networks/05.%203d%20graph%20networks/" class="md-nav__link">
<span class="md-ellipsis">
3D 图网络
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_14" >
<label class="md-nav__link" for="__nav_14" id="__nav_14_label" tabindex="0">
<span class="md-ellipsis">
计算机与操作系统
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_14_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_14">
<span class="md-nav__icon md-icon"></span>
计算机与操作系统
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/01.%20discrete%20maths/" class="md-nav__link">
<span class="md-ellipsis">
离散数学
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/02.%20computer%20architecture/" class="md-nav__link">
<span class="md-ellipsis">
计算机体系结构
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/03.%20operating%20systems/" class="md-nav__link">
<span class="md-ellipsis">
操作系统
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/04.%20concurrency%20and%20parallelism/" class="md-nav__link">
<span class="md-ellipsis">
并发与并行
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2013%3A%20computing%20and%20OS/05.%20programming%20languages/" class="md-nav__link">
<span class="md-ellipsis">
编程语言
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_15" >
<label class="md-nav__link" for="__nav_15" id="__nav_15_label" tabindex="0">
<span class="md-ellipsis">
数据结构与算法
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_15_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_15">
<span class="md-nav__icon md-icon"></span>
数据结构与算法
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/00.%20foundations/" class="md-nav__link">
<span class="md-ellipsis">
基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/01.%20arrays%20and%20hashing/" class="md-nav__link">
<span class="md-ellipsis">
数组与哈希
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/02.%20linked%20lists%2C%20stacks%2C%20and%20queues/" class="md-nav__link">
<span class="md-ellipsis">
链表、栈与队列
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/03.%20trees/" class="md-nav__link">
<span class="md-ellipsis">
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/04.%20graphs/" class="md-nav__link">
<span class="md-ellipsis">
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2014%3A%20data%20structures%20and%20algorithms/05.%20sorting%20and%20search/" class="md-nav__link">
<span class="md-ellipsis">
排序与搜索
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_16" >
<label class="md-nav__link" for="__nav_16" id="__nav_16_label" tabindex="0">
<span class="md-ellipsis">
生产级软件工程
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_16_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_16">
<span class="md-nav__icon md-icon"></span>
生产级软件工程
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/01.%20linux%20and%20CMD/" class="md-nav__link">
<span class="md-ellipsis">
Linux 与命令行
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/02.%20git%20and%20repository%20management/" class="md-nav__link">
<span class="md-ellipsis">
Git 与仓库管理
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/03.%20codebase%20design/" class="md-nav__link">
<span class="md-ellipsis">
代码设计
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/04.%20testing%20and%20quality%20assurance/" class="md-nav__link">
<span class="md-ellipsis">
测试与质量保障
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2015%3A%20production%20software%20engineering/05.%20deployment%20and%20devops/" class="md-nav__link">
<span class="md-ellipsis">
部署与 DevOps
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_17" >
<label class="md-nav__link" for="__nav_17" id="__nav_17_label" tabindex="0">
<span class="md-ellipsis">
SIMD 与 GPU 编程
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_17_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_17">
<span class="md-nav__icon md-icon"></span>
SIMD 与 GPU 编程
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/00.%20why%20C%2B%2B%20and%20how%20ML%20frameworks%20work/" class="md-nav__link">
<span class="md-ellipsis">
为什么是 C++ 及 ML 框架原理
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/01.%20hardware%20fundamentals/" class="md-nav__link">
<span class="md-ellipsis">
硬件基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/02.%20ARM%20and%20NEON/" class="md-nav__link">
<span class="md-ellipsis">
ARM 与 NEON
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/03.%20x86%20and%20AVX/" class="md-nav__link">
<span class="md-ellipsis">
x86 与 AVX
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/04.%20GPU%20architecture%20and%20CUDA/" class="md-nav__link">
<span class="md-ellipsis">
GPU 架构与 CUDA
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/05.%20triton%2C%20TPUs%20and%20pallax/" class="md-nav__link">
<span class="md-ellipsis">
Triton、TPU 与 Pallas
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/06.%20RISC-V%20and%20embedded%20systems/" class="md-nav__link">
<span class="md-ellipsis">
RISC-V 与嵌入式系统
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2016%3A%20SIMD%20and%20GPU%20programming/07.%20vulkan%20compute%20and%20cross-platform%20GPU/" class="md-nav__link">
<span class="md-ellipsis">
Vulkan Compute 与跨平台 GPU
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_18" >
<label class="md-nav__link" for="__nav_18" id="__nav_18_label" tabindex="0">
<span class="md-ellipsis">
AI 推理
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_18_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_18">
<span class="md-nav__icon md-icon"></span>
AI 推理
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2017%3A%20AI%20inference/01.%20quantisation/" class="md-nav__link">
<span class="md-ellipsis">
量化
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2017%3A%20AI%20inference/02.%20efficient%20architectures/" class="md-nav__link">
<span class="md-ellipsis">
高效架构
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2017%3A%20AI%20inference/03.%20serving%20and%20batching/" class="md-nav__link">
<span class="md-ellipsis">
服务与批处理
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2017%3A%20AI%20inference/04.%20edge%20inference/" class="md-nav__link">
<span class="md-ellipsis">
边缘推理
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2017%3A%20AI%20inference/05.%20scaling%20and%20deployment/" class="md-nav__link">
<span class="md-ellipsis">
扩缩与部署
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_19" >
<label class="md-nav__link" for="__nav_19" id="__nav_19_label" tabindex="0">
<span class="md-ellipsis">
ML 系统设计
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_19_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_19">
<span class="md-nav__icon md-icon"></span>
ML 系统设计
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/01.%20systems%20design%20fundamentals/" class="md-nav__link">
<span class="md-ellipsis">
系统设计基础
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/02.%20cloud%20computing/" class="md-nav__link">
<span class="md-ellipsis">
云计算
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/03.%20large%20scale%20infrastructure/" class="md-nav__link">
<span class="md-ellipsis">
大规模基础设施
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/04.%20ML%20systems%20design/" class="md-nav__link">
<span class="md-ellipsis">
ML 系统设计
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2018%3A%20ML%20systems%20design/05.%20ML%20design%20examples/" class="md-nav__link">
<span class="md-ellipsis">
ML 设计案例
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_20" >
<label class="md-nav__link" for="__nav_20" id="__nav_20_label" tabindex="0">
<span class="md-ellipsis">
应用 AI
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_20_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_20">
<span class="md-nav__icon md-icon"></span>
应用 AI
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2019%3A%20applied%20AI/01.%20AI%20for%20finance/" class="md-nav__link">
<span class="md-ellipsis">
AI 金融
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2019%3A%20applied%20AI/02.%20protein%20design/" class="md-nav__link">
<span class="md-ellipsis">
蛋白质设计
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2019%3A%20applied%20AI/03.%20drug%20discovery/" class="md-nav__link">
<span class="md-ellipsis">
药物发现
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2019%3A%20applied%20AI/04.%20agentic%20systems/" class="md-nav__link">
<span class="md-ellipsis">
智能体系统
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2019%3A%20applied%20AI/05.%20healthcare/" class="md-nav__link">
<span class="md-ellipsis">
医疗健康
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_21" >
<label class="md-nav__link" for="__nav_21" id="__nav_21_label" tabindex="0">
<span class="md-ellipsis">
前沿 AI
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_21_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_21">
<span class="md-nav__icon md-icon"></span>
前沿 AI
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/01.%20quantum%20machine%20learning/" class="md-nav__link">
<span class="md-ellipsis">
量子机器学习
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/02.%20neuromorphic%20computing/" class="md-nav__link">
<span class="md-ellipsis">
神经形态计算
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/03.%20datacentres%20in%20space/" class="md-nav__link">
<span class="md-ellipsis">
太空数据中心
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/04.%20decentralised%20AI/" class="md-nav__link">
<span class="md-ellipsis">
去中心化 AI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../chapter%2020%3A%20bleeding%20edge%20AI/05.%20brain%20machine%20interfaces/" class="md-nav__link">
<span class="md-ellipsis">
脑机接口
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="目录">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
目录
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#colabnotebook" class="md-nav__link">
<span class="md-ellipsis">
编程练习(使用CoLab或notebook
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1 id="_1">目标检测与分割<a class="headerlink" href="#_1" title="Permanent link">&para;</a></h1>
<p><em>目标检测定位并分类图像中的每个物体;分割为每个像素分配一个标签。本文件涵盖交并比(IoU)、平均精度均值(mAP)、锚框、R-CNN系列、YOLO、SSD、特征金字塔网络(FPN)、语义/实例/全景分割(U-Net、Mask R-CNN、SAM)以及用于基准测试的评估指标。</em></p>
<ul>
<li>
<p>图像分类(文件02)回答了"这张图像里有什么?"目标检测提出了一个更难的问题:"这张图像里有哪些物体,它们在哪里?"</p>
</li>
<li>
<p>分割则更进一步:"哪些像素属于哪个物体或类别?"这些任务形成了一个空间理解精度逐步提高的层次结构。</p>
</li>
<li>
<p><strong>目标检测</strong>模型输出一组<strong>边界框</strong>,每个边界框由四个坐标(左上角 <span class="arithmatex">\(x, y\)</span>、宽度、高度)以及一个带有置信度分数的类别标签定义。一张图像可能包含零个、一个或数百个来自多个类别的物体。</p>
</li>
</ul>
<p><img alt="输入图像中包含多个物体,每个物体由一个彩色边界框和带有置信度分数的类别标签包围" src="../../images/detection_boxes.svg" /></p>
<ul>
<li><strong>交并比(IoU</strong>衡量预测边界框与真实标注的匹配程度。它是重叠面积除以并集面积:</li>
</ul>
<div class="arithmatex">\[\text{IoU} = \frac{\text{交集面积}}{\text{并集面积}}\]</div>
<ul>
<li>
<p>IoU为1表示完全重叠,IoU为0表示完全不重叠。"正确"检测的标准阈值为IoU <span class="arithmatex">\(\geq 0.5\)</span>,但也使用更严格的阈值(0.75、0.9)。</p>
</li>
<li>
<p>如果预测框与真实框的IoU超过阈值且类别正确,则检测结果为<strong>真正例(TP</strong></p>
</li>
<li>
<p><strong>假正例(FP</strong>是未匹配到任何真实标注的预测框。</p>
</li>
<li>
<p><strong>假负例(FN</strong>是没有任何预测框匹配到的真实物体。这些与第06章中的精确率和召回率概念相同。</p>
</li>
<li>
<p><strong>平均精度(AP</strong>总结单个类别的检测质量。对于每个类别,按置信度分数对所有检测结果排序,计算每个排序位置的精确率和召回率,然后计算精确率-召回率曲线下的面积:</p>
</li>
</ul>
<div class="arithmatex">\[\text{AP} = \int_0^1 p(r) \, dr\]</div>
<ul>
<li>
<p>在实践中,曲线是插值处理的:在每个召回率水平上,精确率被设置为所有召回率 <span class="arithmatex">\(\geq r\)</span> 处的最大精确率。这使曲线平滑并使其单调递减。</p>
</li>
<li>
<p><strong>平均精度均值(mAP</strong>对所有类别的AP进行平均。"mAP@0.5"使用IoU阈值0.5。"mAP@[.5:.95]"COCO标准)在从0.5到0.95的十个IoU阈值上(步长0.05)对mAP进行平均,同时奖励检测能力和精确的定位能力。</p>
</li>
<li>
<p><strong>非极大值抑制(NMS</strong>移除重复的检测结果。当模型为同一个物体预测出多个重叠的边界框时,NMS保留置信度最高的框,并移除所有与其重叠超过IoU阈值的其他框。这是在模型生成原始预测之后,按每个类别分别进行的。</p>
</li>
<li>
<p><strong>两阶段检测器</strong>首先提出候选区域,然后对每个提案进行分类和精细化调整。</p>
</li>
<li>
<p><strong>R-CNN</strong>Girshick 等人,2014年)是第一个成功的深度学习检测器。它使用选择性搜索(一种经典算法)提出约2,000个候选区域,将每个区域变形为固定尺寸,独立通过CNN运行,并使用SVM(第06章)进行分类。R-CNN准确但极其缓慢:每张图像需要运行CNN 2,000次。</p>
</li>
<li>
<p><strong>Fast R-CNN</strong>Girshick,2015年)解决了冗余问题:它在整张图像上运行一次CNN以生成共享特征图,然后使用<strong>RoI池化</strong>(感兴趣区域池化)从该共享特征图中为每个提案提取特征。</p>
</li>
<li>
<p>RoI池化从特征图中取出一个可变大小的区域,通过将该区域划分为一个网格并在每个单元格内进行最大池化,生成固定大小的输出。这种方法快得多,因为昂贵的CNN计算只进行一次。</p>
</li>
<li>
<p><strong>Faster R-CNN</strong>Ren 等人,2015年)引入了<strong>区域提议网络(RPN</strong>,从而消除了外部区域提议算法。RPN是一个小型CNN,运行在共享特征图之上,直接预测提案。RPN在特征图上滑动一个小窗口,在每个位置上预测 <span class="arithmatex">\(k\)</span> 个提案(每个<strong>锚框</strong>对应一个提案)。</p>
</li>
</ul>
<p><img alt="Faster R-CNN流程:输入图像 → 骨干CNN → 共享特征图 → RPN生成提案 → RoI池化 → 分类和边界框回归头" src="../../images/faster_rcnn.svg" /></p>
<ul>
<li>
<p><strong>锚框</strong>是特征图上每个空间位置处预定义的边界框,覆盖不同的尺度和长宽比(例如,三个尺度 <span class="arithmatex">\(\times\)</span> 三个比例 = 每个位置9个锚框)。RPN为每个锚框预测两样东西:物体性分数(物体vs背景)以及用于将锚框精炼为更紧凑提案的坐标偏移量。这种参数化使回归问题更容易:网络不需要预测绝对坐标,只需预测对合理初始框的小幅调整。</p>
</li>
<li>
<p>锚框偏移量的参数化公式为:</p>
</li>
</ul>
<div class="arithmatex">\[t_x = \frac{x - x_a}{w_a}, \quad t_y = \frac{y - y_a}{h_a}, \quad t_w = \log\frac{w}{w_a}, \quad t_h = \log\frac{h}{h_a}\]</div>
<ul>
<li>
<p>其中 <span class="arithmatex">\((x, y, w, h)\)</span> 是预测框的中心和尺寸,<span class="arithmatex">\((x_a, y_a, w_a, h_a)\)</span> 是锚框。宽度和高度的对数变换确保预测框始终为正数,并使回归具有尺度不变性。</p>
</li>
<li>
<p>Faster R-CNN使用多任务损失进行训练:类别标签的分类损失(第05章的交叉熵),以及用于边界框回归的<strong>平滑L1损失</strong>。平滑L1对异常值不如L2敏感:</p>
</li>
</ul>
<div class="arithmatex">\[
\text{smooth}_{L1}(x) = \begin{cases} 0.5x^2 & \text{if } |x| < 1 \\ |x| - 0.5 & \text{otherwise} \end{cases}
\]</div>
<ul>
<li>
<p><strong>特征金字塔网络(FPN</strong>(Lin 等人,2017年)通过构建一个带有侧边连接的自顶向下路径来解决多尺度问题,该路径将高层语义信息与低层空间细节融合。骨干网络生成多个尺度的特征图(每个池化层将分辨率减半)。FPN添加了一个自顶向下的路径,其中每个层级接收来自上一层级的上采样特征,并通过侧边1x1卷积与对应的自底向上层级合并。结果是一个特征图金字塔,每个层级的特征图既具有强语义信息又具有良好的空间分辨率。</p>
</li>
<li>
<p>小物体从金字塔的高分辨率层级检测;大物体从低分辨率层级检测。FPN现在已成为大多数现代检测架构的标准组件。</p>
</li>
<li>
<p><strong>单阶段检测器</strong>完全跳过了提案步骤,在一次前向传播中直接预测类别标签和边界框。这种方法更快,但在历史上准确率低于两阶段检测器,直到焦点损失(focal loss)缩小了这一差距。</p>
</li>
<li>
<p><strong>YOLO</strong>You Only Look OnceRedmon 等人,2016年)将图像划分为一个 <span class="arithmatex">\(S \times S\)</span> 的网格。每个网格单元预测 <span class="arithmatex">\(B\)</span> 个边界框和 <span class="arithmatex">\(C\)</span> 个类别概率。如果一个物体的中心落在一个网格单元内,该单元负责检测该物体。YOLO极其快速,因为整个检测过程只有一次前向传播,没有提案阶段。</p>
</li>
<li>
<p><strong>YOLOv2</strong>添加了锚框、批归一化和多尺度训练。<strong>YOLOv3</strong>使用了特征金字塔网络并在三个尺度上进行预测。<strong>YOLOv4-v8</strong>继续改进,采用了更好的骨干网络、路径聚合网络和马赛克数据增强(在训练中将四张图像拼接在一起以增加上下文多样性)。</p>
</li>
<li>
<p><strong>SSD</strong>Single Shot MultiBox DetectorLiu 等人,2016年)在骨干网络内的多个特征图尺度上进行预测,在每个尺度上使用锚框。早期(高分辨率)特征图检测小物体;后期(低分辨率)特征图检测大物体。SSD比Faster R-CNN更快,且具有竞争力的准确率。</p>
</li>
<li>
<p><strong>RetinaNet</strong>(Lin 等人,2017年)指出了单阶段检测器的核心问题:类别不平衡。绝大多数锚框对应的是背景,这产生了大量容易的负样本,它们主导了损失函数并压倒了来自稀有正样本的梯度。</p>
</li>
<li>
<p><strong>焦点损失(Focal Loss</strong>通过降低容易样本的权重来解决这个问题:</p>
</li>
</ul>
<div class="arithmatex">\[\text{FL}(p_t) = -\alpha_t (1 - p_t)^\gamma \log(p_t)\]</div>
<ul>
<li>
<p>其中 <span class="arithmatex">\(p_t\)</span> 是正确类别的预测概率。当模型自信且正确时(<span class="arithmatex">\(p_t\)</span> 很高),<span class="arithmatex">\((1 - p_t)^\gamma\)</span> 很小,从而减少了容易负样本对损失的贡献。超参数 <span class="arithmatex">\(\gamma\)</span> (通常为2)控制降权的强度。当 <span class="arithmatex">\(\gamma = 0\)</span> 时,焦点损失退化为标准交叉熵。凭借焦点损失,RetinaNet以单阶段的速度实现了与两阶段检测器相当的准确率。</p>
</li>
<li>
<p><strong>无锚框检测</strong>完全消除了锚框,减少了超参数调优并简化了流程。</p>
</li>
<li>
<p><strong>FCOS</strong>(全卷积单阶段检测器,Tian 等人,2019年)在特征图的每个空间位置预测从该位置到最近边界框四条边(左、上、右、下)的距离以及一个类别标签。<strong>中心性(centerness</strong>分数降低了远离物体中心的预测的权重,从而提高了质量。FCOS使用FPN来处理多尺度问题。</p>
</li>
<li>
<p><strong>CenterNet</strong>(Zhou 等人,2019年)将物体检测为点:它预测一个热力图,其中的峰值对应物体中心,然后在每个峰值处回归宽度和高度。检测变成了关键点估计。这种方法优雅且无需锚框,但需要仔细的热力图后处理。</p>
</li>
<li>
<p><strong>CornerNet</strong>将物体检测为一对角点(左上角和右下角)。它预测两个热力图(每个角类型一个),并使用<strong>关联嵌入(associative embedding</strong>将对应的角点匹配成边界框。这避免了对锚框的需求,并处理了任意形状的物体。</p>
</li>
<li>
<p><strong>语义分割</strong>为图像中的每个像素分配一个类别标签。与检测(输出边界框)不同,分割生成密集的像素级映射。一条街景可能会将每个像素标记为道路、人行道、汽车、行人、建筑、天空等。</p>
</li>
</ul>
<p><img alt="语义分割:输入街景及其像素级标签图,每种颜色代表一个类别" src="../../images/semantic_segmentation.svg" /></p>
<ul>
<li>
<p><strong>全卷积网络(FCN</strong>(Long 等人,2015年)通过将全连接层替换为卷积层,使分类CNN适用于分割任务,从而使网络能够输出空间映射而非单个类别。上采样(通过转置卷积或双线性插值)将输出恢复到输入分辨率。来自早期层的跳跃连接添加了在下采样过程中丢失的空间细节。</p>
</li>
<li>
<p><strong>转置卷积</strong>(有时称为"反卷积")是卷积的上采样对应操作。步幅卷积减少空间维度,而转置卷积增加空间维度。它在输入元素之间插入零,然后应用标准卷积,从而有效地学习如何上采样。</p>
</li>
<li>
<p><strong>U-Net</strong>Ronneberger 等人,2015年)引入了一种对称的编码器-解码器架构,在每一层都有跳跃连接。编码器(收缩路径)在增加通道数的同时降低空间分辨率,与分类CNN完全相同。解码器(扩展路径)将结果上采样回全分辨率。跳跃连接在每一层将编码器特征图与解码器特征图拼接起来,为解码器提供精细的空间细节。这种高层语义与低层细节的结合产生了清晰、准确的分割边界。</p>
</li>
</ul>
<p><img alt="U-Net架构:左侧为带下采样的编码器路径,右侧为带下采样的解码器路径,以及连接对应层级的跳跃连接" src="../../images/unet_architecture.svg" /></p>
<ul>
<li>
<p>U-Net最初是为生物医学图像分割设计的(其中训练数据稀缺),其架构已成为许多后续模型的基础,包括潜在扩散模型中的U-Net(文件04)。</p>
</li>
<li>
<p><strong>DeepLab</strong>Chen 等人,2014-2018年)为分割引入了两个关键创新:</p>
<ul>
<li>
<p><strong>空洞(扩张)卷积</strong>:在滤波器元素之间插入间隙的标准卷积,由扩张率 <span class="arithmatex">\(r\)</span> 控制。一个扩张率为 <span class="arithmatex">\(r\)</span> 的3x3滤波器的感受野为 <span class="arithmatex">\((2r + 1) \times (2r + 1)\)</span>,而仅使用9个参数。这在不进行下采样的情况下捕获多尺度上下文,同时保持空间分辨率。</p>
</li>
<li>
<p><strong>空洞空间金字塔池化(ASPP</strong>:并行应用多个具有不同扩张率的空洞卷积(例如,扩张率1、6、12、18),拼接结果,并通过1x1卷积融合。ASPP同时捕获多个尺度的上下文,其精神类似于Inception模块(文件02),但使用扩张而非不同大小的卷积核。</p>
</li>
</ul>
</li>
<li>
<p>DeepLab还使用<strong>条件随机场(CRF</strong>(第05章)作为后处理步骤,通过鼓励空间上相邻且颜色相似的像素共享相同的标签来优化分割边界。</p>
</li>
<li>
<p><strong>实例分割</strong>结合了检测和分割:它识别每个单独的物体实例,并为每个实例生成像素级掩码。场景中的两辆车会得到两个独立的掩码,而不仅仅是"车"。</p>
</li>
<li>
<p><strong>Mask R-CNN</strong>(He 等人,2017年)通过添加一个小型分割头来扩展Faster R-CNN,该分割头为每个检测到的物体预测一个二值掩码。其架构为Faster R-CNN加上一个掩码分支:掩码分支接收RoI池化后的特征,并为每个类别输出一个 <span class="arithmatex">\(m \times m\)</span> 的二值掩码。它使用<strong>RoIAlign</strong>代替RoI池化:在精确定位的采样点处进行双线性插值,而非在量化的网格单元格内进行,这避免了量化引起的空间错位。这一小改动显著提高了掩码质量。</p>
</li>
<li>
<p>Mask R-CNN使用多任务损失进行训练:分类损失 + 边界框回归损失 + 掩码损失(逐像素二值交叉熵)。掩码分支独立地为每个类别预测一个掩码;仅使用与预测类别对应的掩码,这使掩码预测与分类解耦,并同时改进了两者。</p>
</li>
<li>
<p><strong>全景分割</strong>将语义分割和实例分割统一为单个任务。每个像素同时获得一个类别标签(语义)和一个实例ID(用于"物体"类别,如汽车和人)。"背景"类别(天空、道路、草地)只获得语义标签,因为它们是无形区域,没有可计数的实例。</p>
</li>
<li>
<p>全景质量(PQ)指标通过分解为分割质量(匹配片段的平均IoU)和识别质量(匹配片段的F1分数)来评估:</p>
</li>
</ul>
<div class="arithmatex">\[\text{PQ} = \underbrace{\frac{\sum_{(p,g) \in \text{TP}} \text{IoU}(p,g)}{|\text{TP}|}}_{\text{SQ}} \times \underbrace{\frac{|\text{TP}|}{|\text{TP}| + \frac{1}{2}|\text{FP}| + \frac{1}{2}|\text{FN}|}}_{\text{RQ}}\]</div>
<ul>
<li>
<p><strong>实时分割</strong>对于自动驾驶和增强现实等应用至关重要,这些应用对延迟预算要求严格(通常每帧不超过30毫秒)。</p>
</li>
<li>
<p><strong>BiSeNet</strong>(双边分割网络,Yu 等人,2018年)使用两条并行路径:一条<strong>空间路径</strong>,具有宽而浅的层以保留空间细节;一条<strong>上下文路径</strong>,具有深而窄的层以捕获语义信息。输出被融合,兼顾速度和准确率。</p>
</li>
<li>
<p><strong>DDRNet</strong>(深度双分辨率网络,Hong 等人,2021年)在整个网络中以不同分辨率维持两个分支,并在它们之间反复交换信息。高分辨率分支保留空间细节,而低分辨率分支捕获全局上下文。多个双边融合模块在两个方向上合并信息。</p>
</li>
<li>
<p>实时分割的总体趋势是避免沉重的编码器-解码器模式,而是通过网络全程维持足够的空间分辨率,以一定的准确率为代价换取显著更低的延迟。</p>
</li>
</ul>
<h2 id="colabnotebook">编程练习(使用CoLab或notebook<a class="headerlink" href="#colabnotebook" title="Permanent link">&para;</a></h2>
<ol>
<li>
<p>从头实现IoU计算和非极大值抑制。对一组重叠的边界框应用NMS并可视化结果。
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">jax.numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">jnp</span>
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.patches</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">patches</span>
<a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a>
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="k">def</span><span class="w"> </span><span class="nf">compute_iou</span><span class="p">(</span><span class="n">box1</span><span class="p">,</span> <span class="n">box2</span><span class="p">):</span>
<a id="__codelineno-0-6" name="__codelineno-0-6" href="#__codelineno-0-6"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;计算两个框[x1, y1, x2, y2]之间的IoU。&quot;&quot;&quot;</span>
<a id="__codelineno-0-7" name="__codelineno-0-7" href="#__codelineno-0-7"></a> <span class="n">x1</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">box2</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
<a id="__codelineno-0-8" name="__codelineno-0-8" href="#__codelineno-0-8"></a> <span class="n">y1</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="n">box2</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<a id="__codelineno-0-9" name="__codelineno-0-9" href="#__codelineno-0-9"></a> <span class="n">x2</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">minimum</span><span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="n">box2</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span>
<a id="__codelineno-0-10" name="__codelineno-0-10" href="#__codelineno-0-10"></a> <span class="n">y2</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">minimum</span><span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">3</span><span class="p">],</span> <span class="n">box2</span><span class="p">[</span><span class="mi">3</span><span class="p">])</span>
<a id="__codelineno-0-11" name="__codelineno-0-11" href="#__codelineno-0-11"></a>
<a id="__codelineno-0-12" name="__codelineno-0-12" href="#__codelineno-0-12"></a> <span class="n">intersection</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">x2</span> <span class="o">-</span> <span class="n">x1</span><span class="p">)</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">y2</span> <span class="o">-</span> <span class="n">y1</span><span class="p">)</span>
<a id="__codelineno-0-13" name="__codelineno-0-13" href="#__codelineno-0-13"></a> <span class="n">area1</span> <span class="o">=</span> <span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> <span class="n">box1</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">*</span> <span class="p">(</span><span class="n">box1</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">-</span> <span class="n">box1</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<a id="__codelineno-0-14" name="__codelineno-0-14" href="#__codelineno-0-14"></a> <span class="n">area2</span> <span class="o">=</span> <span class="p">(</span><span class="n">box2</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">-</span> <span class="n">box2</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="o">*</span> <span class="p">(</span><span class="n">box2</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span> <span class="o">-</span> <span class="n">box2</span><span class="p">[</span><span class="mi">1</span><span class="p">])</span>
<a id="__codelineno-0-15" name="__codelineno-0-15" href="#__codelineno-0-15"></a> <span class="n">union</span> <span class="o">=</span> <span class="n">area1</span> <span class="o">+</span> <span class="n">area2</span> <span class="o">-</span> <span class="n">intersection</span>
<a id="__codelineno-0-16" name="__codelineno-0-16" href="#__codelineno-0-16"></a>
<a id="__codelineno-0-17" name="__codelineno-0-17" href="#__codelineno-0-17"></a> <span class="k">return</span> <span class="n">intersection</span> <span class="o">/</span> <span class="p">(</span><span class="n">union</span> <span class="o">+</span> <span class="mf">1e-6</span><span class="p">)</span>
<a id="__codelineno-0-18" name="__codelineno-0-18" href="#__codelineno-0-18"></a>
<a id="__codelineno-0-19" name="__codelineno-0-19" href="#__codelineno-0-19"></a><span class="k">def</span><span class="w"> </span><span class="nf">nms</span><span class="p">(</span><span class="n">boxes</span><span class="p">,</span> <span class="n">scores</span><span class="p">,</span> <span class="n">iou_threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">):</span>
<a id="__codelineno-0-20" name="__codelineno-0-20" href="#__codelineno-0-20"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;非极大值抑制。&quot;&quot;&quot;</span>
<a id="__codelineno-0-21" name="__codelineno-0-21" href="#__codelineno-0-21"></a> <span class="n">order</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">argsort</span><span class="p">(</span><span class="o">-</span><span class="n">scores</span><span class="p">)</span> <span class="c1"># 按置信度降序排列</span>
<a id="__codelineno-0-22" name="__codelineno-0-22" href="#__codelineno-0-22"></a> <span class="n">keep</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-23" name="__codelineno-0-23" href="#__codelineno-0-23"></a>
<a id="__codelineno-0-24" name="__codelineno-0-24" href="#__codelineno-0-24"></a> <span class="n">remaining</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">scores</span><span class="p">)))</span>
<a id="__codelineno-0-25" name="__codelineno-0-25" href="#__codelineno-0-25"></a> <span class="n">order_list</span> <span class="o">=</span> <span class="n">order</span><span class="o">.</span><span class="n">tolist</span><span class="p">()</span>
<a id="__codelineno-0-26" name="__codelineno-0-26" href="#__codelineno-0-26"></a>
<a id="__codelineno-0-27" name="__codelineno-0-27" href="#__codelineno-0-27"></a> <span class="k">while</span> <span class="n">order_list</span><span class="p">:</span>
<a id="__codelineno-0-28" name="__codelineno-0-28" href="#__codelineno-0-28"></a> <span class="n">idx</span> <span class="o">=</span> <span class="n">order_list</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<a id="__codelineno-0-29" name="__codelineno-0-29" href="#__codelineno-0-29"></a> <span class="n">keep</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">idx</span><span class="p">)</span>
<a id="__codelineno-0-30" name="__codelineno-0-30" href="#__codelineno-0-30"></a> <span class="n">order_list</span> <span class="o">=</span> <span class="n">order_list</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span>
<a id="__codelineno-0-31" name="__codelineno-0-31" href="#__codelineno-0-31"></a>
<a id="__codelineno-0-32" name="__codelineno-0-32" href="#__codelineno-0-32"></a> <span class="n">new_order</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-0-33" name="__codelineno-0-33" href="#__codelineno-0-33"></a> <span class="k">for</span> <span class="n">j</span> <span class="ow">in</span> <span class="n">order_list</span><span class="p">:</span>
<a id="__codelineno-0-34" name="__codelineno-0-34" href="#__codelineno-0-34"></a> <span class="n">iou</span> <span class="o">=</span> <span class="n">compute_iou</span><span class="p">(</span><span class="n">boxes</span><span class="p">[</span><span class="n">idx</span><span class="p">],</span> <span class="n">boxes</span><span class="p">[</span><span class="n">j</span><span class="p">])</span>
<a id="__codelineno-0-35" name="__codelineno-0-35" href="#__codelineno-0-35"></a> <span class="k">if</span> <span class="n">iou</span> <span class="o">&lt;</span> <span class="n">iou_threshold</span><span class="p">:</span>
<a id="__codelineno-0-36" name="__codelineno-0-36" href="#__codelineno-0-36"></a> <span class="n">new_order</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">j</span><span class="p">)</span>
<a id="__codelineno-0-37" name="__codelineno-0-37" href="#__codelineno-0-37"></a> <span class="n">order_list</span> <span class="o">=</span> <span class="n">new_order</span>
<a id="__codelineno-0-38" name="__codelineno-0-38" href="#__codelineno-0-38"></a>
<a id="__codelineno-0-39" name="__codelineno-0-39" href="#__codelineno-0-39"></a> <span class="k">return</span> <span class="n">keep</span>
<a id="__codelineno-0-40" name="__codelineno-0-40" href="#__codelineno-0-40"></a>
<a id="__codelineno-0-41" name="__codelineno-0-41" href="#__codelineno-0-41"></a><span class="c1"># 示例:同一物体的重叠检测</span>
<a id="__codelineno-0-42" name="__codelineno-0-42" href="#__codelineno-0-42"></a><span class="n">boxes</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">array</span><span class="p">([</span>
<a id="__codelineno-0-43" name="__codelineno-0-43" href="#__codelineno-0-43"></a> <span class="p">[</span><span class="mi">50</span><span class="p">,</span> <span class="mi">60</span><span class="p">,</span> <span class="mi">150</span><span class="p">,</span> <span class="mi">160</span><span class="p">],</span> <span class="c1"># 高置信度</span>
<a id="__codelineno-0-44" name="__codelineno-0-44" href="#__codelineno-0-44"></a> <span class="p">[</span><span class="mi">55</span><span class="p">,</span> <span class="mi">65</span><span class="p">,</span> <span class="mi">155</span><span class="p">,</span> <span class="mi">165</span><span class="p">],</span> <span class="c1"># 重叠的重复框</span>
<a id="__codelineno-0-45" name="__codelineno-0-45" href="#__codelineno-0-45"></a> <span class="p">[</span><span class="mi">52</span><span class="p">,</span> <span class="mi">58</span><span class="p">,</span> <span class="mi">148</span><span class="p">,</span> <span class="mi">158</span><span class="p">],</span> <span class="c1"># 重叠的重复框</span>
<a id="__codelineno-0-46" name="__codelineno-0-46" href="#__codelineno-0-46"></a> <span class="p">[</span><span class="mi">200</span><span class="p">,</span> <span class="mi">100</span><span class="p">,</span> <span class="mi">300</span><span class="p">,</span> <span class="mi">200</span><span class="p">],</span> <span class="c1"># 不同物体</span>
<a id="__codelineno-0-47" name="__codelineno-0-47" href="#__codelineno-0-47"></a> <span class="p">[</span><span class="mi">205</span><span class="p">,</span> <span class="mi">105</span><span class="p">,</span> <span class="mi">305</span><span class="p">,</span> <span class="mi">205</span><span class="p">],</span> <span class="c1"># 重叠的重复框</span>
<a id="__codelineno-0-48" name="__codelineno-0-48" href="#__codelineno-0-48"></a><span class="p">])</span>
<a id="__codelineno-0-49" name="__codelineno-0-49" href="#__codelineno-0-49"></a><span class="n">scores</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">array</span><span class="p">([</span><span class="mf">0.95</span><span class="p">,</span> <span class="mf">0.80</span><span class="p">,</span> <span class="mf">0.70</span><span class="p">,</span> <span class="mf">0.90</span><span class="p">,</span> <span class="mf">0.60</span><span class="p">])</span>
<a id="__codelineno-0-50" name="__codelineno-0-50" href="#__codelineno-0-50"></a>
<a id="__codelineno-0-51" name="__codelineno-0-51" href="#__codelineno-0-51"></a><span class="n">keep</span> <span class="o">=</span> <span class="n">nms</span><span class="p">(</span><span class="n">boxes</span><span class="p">,</span> <span class="n">scores</span><span class="p">,</span> <span class="n">iou_threshold</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span>
<a id="__codelineno-0-52" name="__codelineno-0-52" href="#__codelineno-0-52"></a>
<a id="__codelineno-0-53" name="__codelineno-0-53" href="#__codelineno-0-53"></a><span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">14</span><span class="p">,</span> <span class="mi">5</span><span class="p">))</span>
<a id="__codelineno-0-54" name="__codelineno-0-54" href="#__codelineno-0-54"></a><span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;#3498db&#39;</span><span class="p">,</span> <span class="s1">&#39;#e74c3c&#39;</span><span class="p">,</span> <span class="s1">&#39;#27ae60&#39;</span><span class="p">,</span> <span class="s1">&#39;#9b59b6&#39;</span><span class="p">,</span> <span class="s1">&#39;#f39c12&#39;</span><span class="p">]</span>
<a id="__codelineno-0-55" name="__codelineno-0-55" href="#__codelineno-0-55"></a>
<a id="__codelineno-0-56" name="__codelineno-0-56" href="#__codelineno-0-56"></a><span class="k">for</span> <span class="n">ax</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="n">indices</span> <span class="ow">in</span> <span class="nb">zip</span><span class="p">(</span><span class="n">axes</span><span class="p">,</span> <span class="p">[</span><span class="s1">&#39;NMS之前&#39;</span><span class="p">,</span> <span class="s1">&#39;NMS之后&#39;</span><span class="p">],</span>
<a id="__codelineno-0-57" name="__codelineno-0-57" href="#__codelineno-0-57"></a> <span class="p">[</span><span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">boxes</span><span class="p">)),</span> <span class="n">keep</span><span class="p">]):</span>
<a id="__codelineno-0-58" name="__codelineno-0-58" href="#__codelineno-0-58"></a> <span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">400</span><span class="p">);</span> <span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">300</span><span class="p">)</span>
<a id="__codelineno-0-59" name="__codelineno-0-59" href="#__codelineno-0-59"></a> <span class="n">ax</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="s1">&#39;equal&#39;</span><span class="p">);</span> <span class="n">ax</span><span class="o">.</span><span class="n">invert_yaxis</span><span class="p">()</span>
<a id="__codelineno-0-60" name="__codelineno-0-60" href="#__codelineno-0-60"></a> <span class="n">ax</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="n">title</span><span class="p">)</span>
<a id="__codelineno-0-61" name="__codelineno-0-61" href="#__codelineno-0-61"></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">indices</span><span class="p">:</span>
<a id="__codelineno-0-62" name="__codelineno-0-62" href="#__codelineno-0-62"></a> <span class="n">b</span> <span class="o">=</span> <span class="n">boxes</span><span class="p">[</span><span class="n">i</span><span class="p">]</span>
<a id="__codelineno-0-63" name="__codelineno-0-63" href="#__codelineno-0-63"></a> <span class="n">rect</span> <span class="o">=</span> <span class="n">patches</span><span class="o">.</span><span class="n">Rectangle</span><span class="p">((</span><span class="n">b</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">b</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">b</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">-</span><span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
<a id="__codelineno-0-64" name="__codelineno-0-64" href="#__codelineno-0-64"></a> <span class="n">linewidth</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="n">colors</span><span class="p">[</span><span class="n">i</span><span class="p">],</span>
<a id="__codelineno-0-65" name="__codelineno-0-65" href="#__codelineno-0-65"></a> <span class="n">facecolor</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">)</span>
<a id="__codelineno-0-66" name="__codelineno-0-66" href="#__codelineno-0-66"></a> <span class="n">ax</span><span class="o">.</span><span class="n">add_patch</span><span class="p">(</span><span class="n">rect</span><span class="p">)</span>
<a id="__codelineno-0-67" name="__codelineno-0-67" href="#__codelineno-0-67"></a> <span class="n">ax</span><span class="o">.</span><span class="n">text</span><span class="p">(</span><span class="n">b</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">-</span><span class="mi">5</span><span class="p">,</span> <span class="sa">f</span><span class="s1">&#39;</span><span class="si">{</span><span class="n">scores</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">:</span><span class="s1">.2f</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="n">colors</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="n">fontsize</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
<a id="__codelineno-0-68" name="__codelineno-0-68" href="#__codelineno-0-68"></a>
<a id="__codelineno-0-69" name="__codelineno-0-69" href="#__codelineno-0-69"></a><span class="n">plt</span><span class="o">.</span><span class="n">tight_layout</span><span class="p">();</span> <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
<a id="__codelineno-0-70" name="__codelineno-0-70" href="#__codelineno-0-70"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;NMS后保留了</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">keep</span><span class="p">)</span><span class="si">}</span><span class="s2">个框,共</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">boxes</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre></div></p>
</li>
<li>
<p>实现一个简化的区域提议网络(RPN)。给定一个特征图,生成具有多种尺度和长宽比的锚框,并预测物体性分数和边界框偏移量。
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">jax</span>
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="kn">import</span><span class="w"> </span><span class="nn">jax.numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">jnp</span>
<a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
<a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.patches</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">patches</span>
<a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a>
<a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="k">def</span><span class="w"> </span><span class="nf">generate_anchors</span><span class="p">(</span><span class="n">feature_h</span><span class="p">,</span> <span class="n">feature_w</span><span class="p">,</span> <span class="n">stride</span><span class="p">,</span> <span class="n">scales</span><span class="p">,</span> <span class="n">ratios</span><span class="p">):</span>
<a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;为特征图上的每个位置生成锚框。&quot;&quot;&quot;</span>
<a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a> <span class="n">anchors</span> <span class="o">=</span> <span class="p">[]</span>
<a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a> <span class="k">for</span> <span class="n">y</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">feature_h</span><span class="p">):</span>
<a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">feature_w</span><span class="p">):</span>
<a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a> <span class="n">cx</span> <span class="o">=</span> <span class="p">(</span><span class="n">x</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">*</span> <span class="n">stride</span>
<a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a> <span class="n">cy</span> <span class="o">=</span> <span class="p">(</span><span class="n">y</span> <span class="o">+</span> <span class="mf">0.5</span><span class="p">)</span> <span class="o">*</span> <span class="n">stride</span>
<a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a> <span class="k">for</span> <span class="n">s</span> <span class="ow">in</span> <span class="n">scales</span><span class="p">:</span>
<a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">ratios</span><span class="p">:</span>
<a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a> <span class="n">w</span> <span class="o">=</span> <span class="n">s</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
<a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a> <span class="n">h</span> <span class="o">=</span> <span class="n">s</span> <span class="o">/</span> <span class="n">jnp</span><span class="o">.</span><span class="n">sqrt</span><span class="p">(</span><span class="n">r</span><span class="p">)</span>
<a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a> <span class="n">anchors</span><span class="o">.</span><span class="n">append</span><span class="p">([</span><span class="n">cx</span> <span class="o">-</span> <span class="n">w</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">cy</span> <span class="o">-</span> <span class="n">h</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">cx</span> <span class="o">+</span> <span class="n">w</span><span class="o">/</span><span class="mi">2</span><span class="p">,</span> <span class="n">cy</span> <span class="o">+</span> <span class="n">h</span><span class="o">/</span><span class="mi">2</span><span class="p">])</span>
<a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a> <span class="k">return</span> <span class="n">jnp</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">anchors</span><span class="p">)</span>
<a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a>
<a id="__codelineno-1-20" name="__codelineno-1-20" href="#__codelineno-1-20"></a><span class="k">def</span><span class="w"> </span><span class="nf">rpn_forward</span><span class="p">(</span><span class="n">feature_map</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<a id="__codelineno-1-21" name="__codelineno-1-21" href="#__codelineno-1-21"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;简化版RPN:预测每个锚框的物体性和框偏移量。&quot;&quot;&quot;</span>
<a id="__codelineno-1-22" name="__codelineno-1-22" href="#__codelineno-1-22"></a> <span class="n">H</span><span class="p">,</span> <span class="n">W</span><span class="p">,</span> <span class="n">C</span> <span class="o">=</span> <span class="n">feature_map</span><span class="o">.</span><span class="n">shape</span>
<a id="__codelineno-1-23" name="__codelineno-1-23" href="#__codelineno-1-23"></a> <span class="n">n_anchors</span> <span class="o">=</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;cls_w&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<a id="__codelineno-1-24" name="__codelineno-1-24" href="#__codelineno-1-24"></a>
<a id="__codelineno-1-25" name="__codelineno-1-25" href="#__codelineno-1-25"></a> <span class="c1"># 在特征图上滑动1x1卷积(简化版)</span>
<a id="__codelineno-1-26" name="__codelineno-1-26" href="#__codelineno-1-26"></a> <span class="n">cls_scores</span> <span class="o">=</span> <span class="n">feature_map</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">C</span><span class="p">)</span> <span class="o">@</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;cls_w&#39;</span><span class="p">]</span> <span class="c1"># (H*W, n_anchors)</span>
<a id="__codelineno-1-27" name="__codelineno-1-27" href="#__codelineno-1-27"></a> <span class="n">box_offsets</span> <span class="o">=</span> <span class="n">feature_map</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="n">C</span><span class="p">)</span> <span class="o">@</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;reg_w&#39;</span><span class="p">]</span> <span class="c1"># (H*W, n_anchors*4)</span>
<a id="__codelineno-1-28" name="__codelineno-1-28" href="#__codelineno-1-28"></a>
<a id="__codelineno-1-29" name="__codelineno-1-29" href="#__codelineno-1-29"></a> <span class="n">cls_scores</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">cls_scores</span><span class="p">)</span>
<a id="__codelineno-1-30" name="__codelineno-1-30" href="#__codelineno-1-30"></a> <span class="k">return</span> <span class="n">cls_scores</span><span class="o">.</span><span class="n">ravel</span><span class="p">(),</span> <span class="n">box_offsets</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span>
<a id="__codelineno-1-31" name="__codelineno-1-31" href="#__codelineno-1-31"></a>
<a id="__codelineno-1-32" name="__codelineno-1-32" href="#__codelineno-1-32"></a><span class="c1"># 设置</span>
<a id="__codelineno-1-33" name="__codelineno-1-33" href="#__codelineno-1-33"></a><span class="n">feature_h</span><span class="p">,</span> <span class="n">feature_w</span><span class="p">,</span> <span class="n">channels</span> <span class="o">=</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">16</span>
<a id="__codelineno-1-34" name="__codelineno-1-34" href="#__codelineno-1-34"></a><span class="n">stride</span> <span class="o">=</span> <span class="mi">16</span> <span class="c1"># 每个特征图单元格覆盖16x16像素</span>
<a id="__codelineno-1-35" name="__codelineno-1-35" href="#__codelineno-1-35"></a><span class="n">scales</span> <span class="o">=</span> <span class="p">[</span><span class="mi">32</span><span class="p">,</span> <span class="mi">64</span><span class="p">,</span> <span class="mi">128</span><span class="p">]</span>
<a id="__codelineno-1-36" name="__codelineno-1-36" href="#__codelineno-1-36"></a><span class="n">ratios</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.5</span><span class="p">,</span> <span class="mf">1.0</span><span class="p">,</span> <span class="mf">2.0</span><span class="p">]</span>
<a id="__codelineno-1-37" name="__codelineno-1-37" href="#__codelineno-1-37"></a><span class="n">n_anchors_per_pos</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">scales</span><span class="p">)</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">ratios</span><span class="p">)</span>
<a id="__codelineno-1-38" name="__codelineno-1-38" href="#__codelineno-1-38"></a>
<a id="__codelineno-1-39" name="__codelineno-1-39" href="#__codelineno-1-39"></a><span class="n">key</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span>
<a id="__codelineno-1-40" name="__codelineno-1-40" href="#__codelineno-1-40"></a><span class="n">k1</span><span class="p">,</span> <span class="n">k2</span><span class="p">,</span> <span class="n">k3</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="mi">3</span><span class="p">)</span>
<a id="__codelineno-1-41" name="__codelineno-1-41" href="#__codelineno-1-41"></a>
<a id="__codelineno-1-42" name="__codelineno-1-42" href="#__codelineno-1-42"></a><span class="n">feature_map</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">k1</span><span class="p">,</span> <span class="p">(</span><span class="n">feature_h</span><span class="p">,</span> <span class="n">feature_w</span><span class="p">,</span> <span class="n">channels</span><span class="p">))</span>
<a id="__codelineno-1-43" name="__codelineno-1-43" href="#__codelineno-1-43"></a><span class="n">params</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-1-44" name="__codelineno-1-44" href="#__codelineno-1-44"></a> <span class="s1">&#39;cls_w&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">k2</span><span class="p">,</span> <span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">n_anchors_per_pos</span><span class="p">))</span> <span class="o">*</span> <span class="mf">0.01</span><span class="p">,</span>
<a id="__codelineno-1-45" name="__codelineno-1-45" href="#__codelineno-1-45"></a> <span class="s1">&#39;reg_w&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">k3</span><span class="p">,</span> <span class="p">(</span><span class="n">channels</span><span class="p">,</span> <span class="n">n_anchors_per_pos</span> <span class="o">*</span> <span class="mi">4</span><span class="p">))</span> <span class="o">*</span> <span class="mf">0.01</span><span class="p">,</span>
<a id="__codelineno-1-46" name="__codelineno-1-46" href="#__codelineno-1-46"></a><span class="p">}</span>
<a id="__codelineno-1-47" name="__codelineno-1-47" href="#__codelineno-1-47"></a>
<a id="__codelineno-1-48" name="__codelineno-1-48" href="#__codelineno-1-48"></a><span class="n">anchors</span> <span class="o">=</span> <span class="n">generate_anchors</span><span class="p">(</span><span class="n">feature_h</span><span class="p">,</span> <span class="n">feature_w</span><span class="p">,</span> <span class="n">stride</span><span class="p">,</span> <span class="n">scales</span><span class="p">,</span> <span class="n">ratios</span><span class="p">)</span>
<a id="__codelineno-1-49" name="__codelineno-1-49" href="#__codelineno-1-49"></a><span class="n">scores</span><span class="p">,</span> <span class="n">offsets</span> <span class="o">=</span> <span class="n">rpn_forward</span><span class="p">(</span><span class="n">feature_map</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
<a id="__codelineno-1-50" name="__codelineno-1-50" href="#__codelineno-1-50"></a>
<a id="__codelineno-1-51" name="__codelineno-1-51" href="#__codelineno-1-51"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;特征图:</span><span class="si">{</span><span class="n">feature_h</span><span class="si">}</span><span class="s2">x</span><span class="si">{</span><span class="n">feature_w</span><span class="si">}</span><span class="s2">,步幅=</span><span class="si">{</span><span class="n">stride</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-1-52" name="__codelineno-1-52" href="#__codelineno-1-52"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;每个位置的锚框数:</span><span class="si">{</span><span class="n">n_anchors_per_pos</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-1-53" name="__codelineno-1-53" href="#__codelineno-1-53"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;锚框总数:</span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">anchors</span><span class="p">)</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-1-54" name="__codelineno-1-54" href="#__codelineno-1-54"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;物体性分数形状:</span><span class="si">{</span><span class="n">scores</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-1-55" name="__codelineno-1-55" href="#__codelineno-1-55"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;边界框偏移量形状:</span><span class="si">{</span><span class="n">offsets</span><span class="o">.</span><span class="n">shape</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-1-56" name="__codelineno-1-56" href="#__codelineno-1-56"></a>
<a id="__codelineno-1-57" name="__codelineno-1-57" href="#__codelineno-1-57"></a><span class="c1"># 可视化一个位置的锚框</span>
<a id="__codelineno-1-58" name="__codelineno-1-58" href="#__codelineno-1-58"></a><span class="n">fig</span><span class="p">,</span> <span class="n">ax</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">6</span><span class="p">,</span> <span class="mi">6</span><span class="p">))</span>
<a id="__codelineno-1-59" name="__codelineno-1-59" href="#__codelineno-1-59"></a><span class="n">img_size</span> <span class="o">=</span> <span class="n">feature_h</span> <span class="o">*</span> <span class="n">stride</span>
<a id="__codelineno-1-60" name="__codelineno-1-60" href="#__codelineno-1-60"></a><span class="n">ax</span><span class="o">.</span><span class="n">set_xlim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">img_size</span><span class="p">);</span> <span class="n">ax</span><span class="o">.</span><span class="n">set_ylim</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">img_size</span><span class="p">)</span>
<a id="__codelineno-1-61" name="__codelineno-1-61" href="#__codelineno-1-61"></a><span class="n">ax</span><span class="o">.</span><span class="n">invert_yaxis</span><span class="p">();</span> <span class="n">ax</span><span class="o">.</span><span class="n">set_aspect</span><span class="p">(</span><span class="s1">&#39;equal&#39;</span><span class="p">)</span>
<a id="__codelineno-1-62" name="__codelineno-1-62" href="#__codelineno-1-62"></a>
<a id="__codelineno-1-63" name="__codelineno-1-63" href="#__codelineno-1-63"></a><span class="n">pos_idx</span> <span class="o">=</span> <span class="n">feature_h</span> <span class="o">//</span> <span class="mi">2</span> <span class="o">*</span> <span class="n">feature_w</span> <span class="o">+</span> <span class="n">feature_w</span> <span class="o">//</span> <span class="mi">2</span> <span class="c1"># 中心位置</span>
<a id="__codelineno-1-64" name="__codelineno-1-64" href="#__codelineno-1-64"></a><span class="n">colors</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;#3498db&#39;</span><span class="p">,</span> <span class="s1">&#39;#e74c3c&#39;</span><span class="p">,</span> <span class="s1">&#39;#27ae60&#39;</span><span class="p">]</span>
<a id="__codelineno-1-65" name="__codelineno-1-65" href="#__codelineno-1-65"></a><span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">s</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">scales</span><span class="p">):</span>
<a id="__codelineno-1-66" name="__codelineno-1-66" href="#__codelineno-1-66"></a> <span class="k">for</span> <span class="n">j</span><span class="p">,</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">ratios</span><span class="p">):</span>
<a id="__codelineno-1-67" name="__codelineno-1-67" href="#__codelineno-1-67"></a> <span class="n">idx</span> <span class="o">=</span> <span class="n">pos_idx</span> <span class="o">*</span> <span class="n">n_anchors_per_pos</span> <span class="o">+</span> <span class="n">i</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">ratios</span><span class="p">)</span> <span class="o">+</span> <span class="n">j</span>
<a id="__codelineno-1-68" name="__codelineno-1-68" href="#__codelineno-1-68"></a> <span class="n">a</span> <span class="o">=</span> <span class="n">anchors</span><span class="p">[</span><span class="n">idx</span><span class="p">]</span>
<a id="__codelineno-1-69" name="__codelineno-1-69" href="#__codelineno-1-69"></a> <span class="n">rect</span> <span class="o">=</span> <span class="n">patches</span><span class="o">.</span><span class="n">Rectangle</span><span class="p">((</span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">a</span><span class="p">[</span><span class="mi">1</span><span class="p">]),</span> <span class="n">a</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">-</span><span class="n">a</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="n">a</span><span class="p">[</span><span class="mi">3</span><span class="p">]</span><span class="o">-</span><span class="n">a</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span>
<a id="__codelineno-1-70" name="__codelineno-1-70" href="#__codelineno-1-70"></a> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.5</span><span class="p">,</span> <span class="n">edgecolor</span><span class="o">=</span><span class="n">colors</span><span class="p">[</span><span class="n">i</span><span class="p">],</span>
<a id="__codelineno-1-71" name="__codelineno-1-71" href="#__codelineno-1-71"></a> <span class="n">facecolor</span><span class="o">=</span><span class="s1">&#39;none&#39;</span><span class="p">,</span> <span class="n">linestyle</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;--&#39;</span><span class="p">,</span> <span class="s1">&#39;-&#39;</span><span class="p">,</span> <span class="s1">&#39;:&#39;</span><span class="p">][</span><span class="n">j</span><span class="p">])</span>
<a id="__codelineno-1-72" name="__codelineno-1-72" href="#__codelineno-1-72"></a> <span class="n">ax</span><span class="o">.</span><span class="n">add_patch</span><span class="p">(</span><span class="n">rect</span><span class="p">)</span>
<a id="__codelineno-1-73" name="__codelineno-1-73" href="#__codelineno-1-73"></a>
<a id="__codelineno-1-74" name="__codelineno-1-74" href="#__codelineno-1-74"></a><span class="n">ax</span><span class="o">.</span><span class="n">scatter</span><span class="p">([</span><span class="n">img_size</span><span class="o">/</span><span class="mi">2</span><span class="p">],</span> <span class="p">[</span><span class="n">img_size</span><span class="o">/</span><span class="mi">2</span><span class="p">],</span> <span class="n">c</span><span class="o">=</span><span class="s1">&#39;red&#39;</span><span class="p">,</span> <span class="n">s</span><span class="o">=</span><span class="mi">50</span><span class="p">,</span> <span class="n">zorder</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
<a id="__codelineno-1-75" name="__codelineno-1-75" href="#__codelineno-1-75"></a><span class="n">ax</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="sa">f</span><span class="s1">&#39;中心位置的锚框</span><span class="se">\n</span><span class="s1">3个尺度 × 3个比例 = </span><span class="si">{</span><span class="n">n_anchors_per_pos</span><span class="si">}</span><span class="s1">&#39;</span><span class="p">)</span>
<a id="__codelineno-1-76" name="__codelineno-1-76" href="#__codelineno-1-76"></a><span class="n">ax</span><span class="o">.</span><span class="n">grid</span><span class="p">(</span><span class="kc">True</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">)</span>
<a id="__codelineno-1-77" name="__codelineno-1-77" href="#__codelineno-1-77"></a><span class="n">plt</span><span class="o">.</span><span class="n">tight_layout</span><span class="p">();</span> <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</code></pre></div></p>
</li>
<li>
<p>实现一个简化版的一维U-Net编码器-解码器,带有跳跃连接,用于一维分割(一维信号的二值标注)。
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">jax</span>
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="kn">import</span><span class="w"> </span><span class="nn">jax.numpy</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">jnp</span>
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="kn">import</span><span class="w"> </span><span class="nn">matplotlib.pyplot</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">plt</span>
<a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a>
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="k">def</span><span class="w"> </span><span class="nf">conv1d_same</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">kernel</span><span class="p">):</span>
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;具有相同填充的一维卷积。&quot;&quot;&quot;</span>
<a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a> <span class="n">k</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">kernel</span><span class="p">)</span>
<a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a> <span class="n">pad</span> <span class="o">=</span> <span class="n">k</span> <span class="o">//</span> <span class="mi">2</span>
<a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a> <span class="n">x_pad</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">pad</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">pad</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s1">&#39;edge&#39;</span><span class="p">)</span>
<a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a> <span class="n">n</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>
<a id="__codelineno-2-11" name="__codelineno-2-11" href="#__codelineno-2-11"></a> <span class="n">out</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">n</span><span class="p">)</span>
<a id="__codelineno-2-12" name="__codelineno-2-12" href="#__codelineno-2-12"></a> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
<a id="__codelineno-2-13" name="__codelineno-2-13" href="#__codelineno-2-13"></a> <span class="n">out</span> <span class="o">=</span> <span class="n">out</span><span class="o">.</span><span class="n">at</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">set</span><span class="p">(</span><span class="n">jnp</span><span class="o">.</span><span class="n">sum</span><span class="p">(</span><span class="n">x_pad</span><span class="p">[</span><span class="n">i</span><span class="p">:</span><span class="n">i</span><span class="o">+</span><span class="n">k</span><span class="p">]</span> <span class="o">*</span> <span class="n">kernel</span><span class="p">))</span>
<a id="__codelineno-2-14" name="__codelineno-2-14" href="#__codelineno-2-14"></a> <span class="k">return</span> <span class="n">out</span>
<a id="__codelineno-2-15" name="__codelineno-2-15" href="#__codelineno-2-15"></a>
<a id="__codelineno-2-16" name="__codelineno-2-16" href="#__codelineno-2-16"></a><span class="k">def</span><span class="w"> </span><span class="nf">downsample</span><span class="p">(</span><span class="n">x</span><span class="p">):</span>
<a id="__codelineno-2-17" name="__codelineno-2-17" href="#__codelineno-2-17"></a> <span class="k">return</span> <span class="n">x</span><span class="p">[::</span><span class="mi">2</span><span class="p">]</span>
<a id="__codelineno-2-18" name="__codelineno-2-18" href="#__codelineno-2-18"></a>
<a id="__codelineno-2-19" name="__codelineno-2-19" href="#__codelineno-2-19"></a><span class="k">def</span><span class="w"> </span><span class="nf">upsample</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">target_len</span><span class="p">):</span>
<a id="__codelineno-2-20" name="__codelineno-2-20" href="#__codelineno-2-20"></a> <span class="k">return</span> <span class="n">jnp</span><span class="o">.</span><span class="n">interp</span><span class="p">(</span><span class="n">jnp</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">target_len</span><span class="p">),</span> <span class="n">jnp</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">x</span><span class="p">)),</span> <span class="n">x</span><span class="p">)</span>
<a id="__codelineno-2-21" name="__codelineno-2-21" href="#__codelineno-2-21"></a>
<a id="__codelineno-2-22" name="__codelineno-2-22" href="#__codelineno-2-22"></a><span class="k">def</span><span class="w"> </span><span class="nf">unet_1d</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">params</span><span class="p">):</span>
<a id="__codelineno-2-23" name="__codelineno-2-23" href="#__codelineno-2-23"></a><span class="w"> </span><span class="sd">&quot;&quot;&quot;简化版一维U-Net,包含2个编码器/解码器层级。&quot;&quot;&quot;</span>
<a id="__codelineno-2-24" name="__codelineno-2-24" href="#__codelineno-2-24"></a> <span class="c1"># 编码器</span>
<a id="__codelineno-2-25" name="__codelineno-2-25" href="#__codelineno-2-25"></a> <span class="n">e1</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">conv1d_same</span><span class="p">(</span><span class="n">x</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;enc1&#39;</span><span class="p">]))</span>
<a id="__codelineno-2-26" name="__codelineno-2-26" href="#__codelineno-2-26"></a> <span class="n">e1_down</span> <span class="o">=</span> <span class="n">downsample</span><span class="p">(</span><span class="n">e1</span><span class="p">)</span>
<a id="__codelineno-2-27" name="__codelineno-2-27" href="#__codelineno-2-27"></a>
<a id="__codelineno-2-28" name="__codelineno-2-28" href="#__codelineno-2-28"></a> <span class="n">e2</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">conv1d_same</span><span class="p">(</span><span class="n">e1_down</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;enc2&#39;</span><span class="p">]))</span>
<a id="__codelineno-2-29" name="__codelineno-2-29" href="#__codelineno-2-29"></a> <span class="n">e2_down</span> <span class="o">=</span> <span class="n">downsample</span><span class="p">(</span><span class="n">e2</span><span class="p">)</span>
<a id="__codelineno-2-30" name="__codelineno-2-30" href="#__codelineno-2-30"></a>
<a id="__codelineno-2-31" name="__codelineno-2-31" href="#__codelineno-2-31"></a> <span class="c1"># 瓶颈层</span>
<a id="__codelineno-2-32" name="__codelineno-2-32" href="#__codelineno-2-32"></a> <span class="n">bottleneck</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">conv1d_same</span><span class="p">(</span><span class="n">e2_down</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;bottleneck&#39;</span><span class="p">]))</span>
<a id="__codelineno-2-33" name="__codelineno-2-33" href="#__codelineno-2-33"></a>
<a id="__codelineno-2-34" name="__codelineno-2-34" href="#__codelineno-2-34"></a> <span class="c1"># 带跳跃连接的解码器</span>
<a id="__codelineno-2-35" name="__codelineno-2-35" href="#__codelineno-2-35"></a> <span class="n">d2_up</span> <span class="o">=</span> <span class="n">upsample</span><span class="p">(</span><span class="n">bottleneck</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">e2</span><span class="p">))</span>
<a id="__codelineno-2-36" name="__codelineno-2-36" href="#__codelineno-2-36"></a> <span class="n">d2</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">maximum</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="n">conv1d_same</span><span class="p">(</span><span class="n">d2_up</span> <span class="o">+</span> <span class="n">e2</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;dec2&#39;</span><span class="p">]))</span> <span class="c1"># 跳跃连接</span>
<a id="__codelineno-2-37" name="__codelineno-2-37" href="#__codelineno-2-37"></a>
<a id="__codelineno-2-38" name="__codelineno-2-38" href="#__codelineno-2-38"></a> <span class="n">d1_up</span> <span class="o">=</span> <span class="n">upsample</span><span class="p">(</span><span class="n">d2</span><span class="p">,</span> <span class="nb">len</span><span class="p">(</span><span class="n">e1</span><span class="p">))</span>
<a id="__codelineno-2-39" name="__codelineno-2-39" href="#__codelineno-2-39"></a> <span class="n">d1</span> <span class="o">=</span> <span class="n">conv1d_same</span><span class="p">(</span><span class="n">d1_up</span> <span class="o">+</span> <span class="n">e1</span><span class="p">,</span> <span class="n">params</span><span class="p">[</span><span class="s1">&#39;dec1&#39;</span><span class="p">])</span> <span class="c1"># 跳跃连接</span>
<a id="__codelineno-2-40" name="__codelineno-2-40" href="#__codelineno-2-40"></a>
<a id="__codelineno-2-41" name="__codelineno-2-41" href="#__codelineno-2-41"></a> <span class="k">return</span> <span class="n">jax</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">sigmoid</span><span class="p">(</span><span class="n">d1</span><span class="p">)</span>
<a id="__codelineno-2-42" name="__codelineno-2-42" href="#__codelineno-2-42"></a>
<a id="__codelineno-2-43" name="__codelineno-2-43" href="#__codelineno-2-43"></a><span class="c1"># 创建带有标注区域的信号</span>
<a id="__codelineno-2-44" name="__codelineno-2-44" href="#__codelineno-2-44"></a><span class="n">n</span> <span class="o">=</span> <span class="mi">128</span>
<a id="__codelineno-2-45" name="__codelineno-2-45" href="#__codelineno-2-45"></a><span class="n">t</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">linspace</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">4</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">pi</span><span class="p">,</span> <span class="n">n</span><span class="p">)</span>
<a id="__codelineno-2-46" name="__codelineno-2-46" href="#__codelineno-2-46"></a><span class="n">signal</span> <span class="o">=</span> <span class="n">jnp</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="n">t</span><span class="p">)</span> <span class="o">+</span> <span class="mf">0.5</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">sin</span><span class="p">(</span><span class="mi">3</span> <span class="o">*</span> <span class="n">t</span><span class="p">)</span>
<a id="__codelineno-2-47" name="__codelineno-2-47" href="#__codelineno-2-47"></a><span class="n">labels</span> <span class="o">=</span> <span class="p">(</span><span class="n">signal</span> <span class="o">&gt;</span> <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="n">jnp</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span> <span class="c1"># 二值分割目标</span>
<a id="__codelineno-2-48" name="__codelineno-2-48" href="#__codelineno-2-48"></a>
<a id="__codelineno-2-49" name="__codelineno-2-49" href="#__codelineno-2-49"></a><span class="n">key</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">PRNGKey</span><span class="p">(</span><span class="mi">42</span><span class="p">)</span>
<a id="__codelineno-2-50" name="__codelineno-2-50" href="#__codelineno-2-50"></a><span class="n">keys</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="n">key</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
<a id="__codelineno-2-51" name="__codelineno-2-51" href="#__codelineno-2-51"></a><span class="n">params</span> <span class="o">=</span> <span class="p">{</span>
<a id="__codelineno-2-52" name="__codelineno-2-52" href="#__codelineno-2-52"></a> <span class="s1">&#39;enc1&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">keys</span><span class="p">[</span><span class="mi">0</span><span class="p">],</span> <span class="p">(</span><span class="mi">5</span><span class="p">,))</span> <span class="o">*</span> <span class="mf">0.3</span><span class="p">,</span>
<a id="__codelineno-2-53" name="__codelineno-2-53" href="#__codelineno-2-53"></a> <span class="s1">&#39;enc2&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">keys</span><span class="p">[</span><span class="mi">1</span><span class="p">],</span> <span class="p">(</span><span class="mi">5</span><span class="p">,))</span> <span class="o">*</span> <span class="mf">0.3</span><span class="p">,</span>
<a id="__codelineno-2-54" name="__codelineno-2-54" href="#__codelineno-2-54"></a> <span class="s1">&#39;bottleneck&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">keys</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span> <span class="p">(</span><span class="mi">3</span><span class="p">,))</span> <span class="o">*</span> <span class="mf">0.3</span><span class="p">,</span>
<a id="__codelineno-2-55" name="__codelineno-2-55" href="#__codelineno-2-55"></a> <span class="s1">&#39;dec2&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">keys</span><span class="p">[</span><span class="mi">3</span><span class="p">],</span> <span class="p">(</span><span class="mi">5</span><span class="p">,))</span> <span class="o">*</span> <span class="mf">0.3</span><span class="p">,</span>
<a id="__codelineno-2-56" name="__codelineno-2-56" href="#__codelineno-2-56"></a> <span class="s1">&#39;dec1&#39;</span><span class="p">:</span> <span class="n">jax</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">normal</span><span class="p">(</span><span class="n">keys</span><span class="p">[</span><span class="mi">4</span><span class="p">],</span> <span class="p">(</span><span class="mi">5</span><span class="p">,))</span> <span class="o">*</span> <span class="mf">0.3</span><span class="p">,</span>
<a id="__codelineno-2-57" name="__codelineno-2-57" href="#__codelineno-2-57"></a><span class="p">}</span>
<a id="__codelineno-2-58" name="__codelineno-2-58" href="#__codelineno-2-58"></a>
<a id="__codelineno-2-59" name="__codelineno-2-59" href="#__codelineno-2-59"></a><span class="k">def</span><span class="w"> </span><span class="nf">loss_fn</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">signal</span><span class="p">,</span> <span class="n">labels</span><span class="p">):</span>
<a id="__codelineno-2-60" name="__codelineno-2-60" href="#__codelineno-2-60"></a> <span class="n">pred</span> <span class="o">=</span> <span class="n">unet_1d</span><span class="p">(</span><span class="n">signal</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
<a id="__codelineno-2-61" name="__codelineno-2-61" href="#__codelineno-2-61"></a> <span class="k">return</span> <span class="o">-</span><span class="n">jnp</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">labels</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="n">pred</span> <span class="o">+</span> <span class="mf">1e-7</span><span class="p">)</span> <span class="o">+</span> <span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">labels</span><span class="p">)</span> <span class="o">*</span> <span class="n">jnp</span><span class="o">.</span><span class="n">log</span><span class="p">(</span><span class="mi">1</span> <span class="o">-</span> <span class="n">pred</span> <span class="o">+</span> <span class="mf">1e-7</span><span class="p">))</span>
<a id="__codelineno-2-62" name="__codelineno-2-62" href="#__codelineno-2-62"></a>
<a id="__codelineno-2-63" name="__codelineno-2-63" href="#__codelineno-2-63"></a><span class="n">grad_fn</span> <span class="o">=</span> <span class="n">jax</span><span class="o">.</span><span class="n">jit</span><span class="p">(</span><span class="n">jax</span><span class="o">.</span><span class="n">grad</span><span class="p">(</span><span class="n">loss_fn</span><span class="p">))</span>
<a id="__codelineno-2-64" name="__codelineno-2-64" href="#__codelineno-2-64"></a><span class="n">lr</span> <span class="o">=</span> <span class="mf">0.05</span>
<a id="__codelineno-2-65" name="__codelineno-2-65" href="#__codelineno-2-65"></a>
<a id="__codelineno-2-66" name="__codelineno-2-66" href="#__codelineno-2-66"></a><span class="k">for</span> <span class="n">step</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">500</span><span class="p">):</span>
<a id="__codelineno-2-67" name="__codelineno-2-67" href="#__codelineno-2-67"></a> <span class="n">grads</span> <span class="o">=</span> <span class="n">grad_fn</span><span class="p">(</span><span class="n">params</span><span class="p">,</span> <span class="n">signal</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
<a id="__codelineno-2-68" name="__codelineno-2-68" href="#__codelineno-2-68"></a> <span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="n">k</span><span class="p">:</span> <span class="n">params</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="o">-</span> <span class="n">lr</span> <span class="o">*</span> <span class="n">grads</span><span class="p">[</span><span class="n">k</span><span class="p">]</span> <span class="k">for</span> <span class="n">k</span> <span class="ow">in</span> <span class="n">params</span><span class="p">}</span>
<a id="__codelineno-2-69" name="__codelineno-2-69" href="#__codelineno-2-69"></a>
<a id="__codelineno-2-70" name="__codelineno-2-70" href="#__codelineno-2-70"></a><span class="n">pred</span> <span class="o">=</span> <span class="n">unet_1d</span><span class="p">(</span><span class="n">signal</span><span class="p">,</span> <span class="n">params</span><span class="p">)</span>
<a id="__codelineno-2-71" name="__codelineno-2-71" href="#__codelineno-2-71"></a>
<a id="__codelineno-2-72" name="__codelineno-2-72" href="#__codelineno-2-72"></a><span class="n">fig</span><span class="p">,</span> <span class="n">axes</span> <span class="o">=</span> <span class="n">plt</span><span class="o">.</span><span class="n">subplots</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">figsize</span><span class="o">=</span><span class="p">(</span><span class="mi">12</span><span class="p">,</span> <span class="mi">7</span><span class="p">),</span> <span class="n">sharex</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<a id="__codelineno-2-73" name="__codelineno-2-73" href="#__codelineno-2-73"></a><span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">signal</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;#3498db&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.5</span><span class="p">)</span>
<a id="__codelineno-2-74" name="__codelineno-2-74" href="#__codelineno-2-74"></a><span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;输入信号&#39;</span><span class="p">);</span> <span class="n">axes</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;&#39;</span><span class="p">)</span>
<a id="__codelineno-2-75" name="__codelineno-2-75" href="#__codelineno-2-75"></a>
<a id="__codelineno-2-76" name="__codelineno-2-76" href="#__codelineno-2-76"></a><span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="n">labels</span><span class="p">,</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.3</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;#27ae60&#39;</span><span class="p">)</span>
<a id="__codelineno-2-77" name="__codelineno-2-77" href="#__codelineno-2-77"></a><span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;真实标注&#39;</span><span class="p">);</span> <span class="n">axes</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;标签&#39;</span><span class="p">)</span>
<a id="__codelineno-2-78" name="__codelineno-2-78" href="#__codelineno-2-78"></a>
<a id="__codelineno-2-79" name="__codelineno-2-79" href="#__codelineno-2-79"></a><span class="n">axes</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">plot</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="n">pred</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;#e74c3c&#39;</span><span class="p">,</span> <span class="n">linewidth</span><span class="o">=</span><span class="mf">1.5</span><span class="p">)</span>
<a id="__codelineno-2-80" name="__codelineno-2-80" href="#__codelineno-2-80"></a><span class="n">axes</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">fill_between</span><span class="p">(</span><span class="n">t</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="p">(</span><span class="n">pred</span> <span class="o">&gt;</span> <span class="mf">0.5</span><span class="p">)</span><span class="o">.</span><span class="n">astype</span><span class="p">(</span><span class="nb">float</span><span class="p">),</span> <span class="n">alpha</span><span class="o">=</span><span class="mf">0.2</span><span class="p">,</span> <span class="n">color</span><span class="o">=</span><span class="s1">&#39;#e74c3c&#39;</span><span class="p">)</span>
<a id="__codelineno-2-81" name="__codelineno-2-81" href="#__codelineno-2-81"></a><span class="n">axes</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">set_title</span><span class="p">(</span><span class="s1">&#39;U-Net预测&#39;</span><span class="p">);</span> <span class="n">axes</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">set_ylabel</span><span class="p">(</span><span class="s1">&#39;概率&#39;</span><span class="p">)</span>
<a id="__codelineno-2-82" name="__codelineno-2-82" href="#__codelineno-2-82"></a><span class="n">axes</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span><span class="o">.</span><span class="n">set_xlabel</span><span class="p">(</span><span class="s1">&#39;t&#39;</span><span class="p">)</span>
<a id="__codelineno-2-83" name="__codelineno-2-83" href="#__codelineno-2-83"></a>
<a id="__codelineno-2-84" name="__codelineno-2-84" href="#__codelineno-2-84"></a><span class="n">plt</span><span class="o">.</span><span class="n">tight_layout</span><span class="p">();</span> <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
<a id="__codelineno-2-85" name="__codelineno-2-85" href="#__codelineno-2-85"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;最终损失:</span><span class="si">{</span><span class="n">loss_fn</span><span class="p">(</span><span class="n">params</span><span class="p">,</span><span class="w"> </span><span class="n">signal</span><span class="p">,</span><span class="w"> </span><span class="n">labels</span><span class="p">)</span><span class="si">:</span><span class="s2">.4f</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<a id="__codelineno-2-86" name="__codelineno-2-86" href="#__codelineno-2-86"></a><span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;像素准确率:</span><span class="si">{</span><span class="n">jnp</span><span class="o">.</span><span class="n">mean</span><span class="p">((</span><span class="n">pred</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="mf">0.5</span><span class="p">)</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="n">labels</span><span class="p">)</span><span class="si">:</span><span class="s2">.2%</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
</code></pre></div></p>
</li>
</ol>
</article>
</div>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
回到页面顶部
</button>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="页脚" >
<a href="../02.%20convolutional%20networks/" class="md-footer__link md-footer__link--prev" aria-label="上一页: 卷积网络">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</div>
<div class="md-footer__title">
<span class="md-footer__direction">
上一页
</span>
<div class="md-ellipsis">
卷积网络
</div>
</div>
</a>
<a href="../04.%20vision%20transformers%20and%20generation/" class="md-footer__link md-footer__link--next" aria-label="下一页: ViT 与生成模型">
<div class="md-footer__title">
<span class="md-footer__direction">
下一页
</span>
<div class="md-ellipsis">
ViT 与生成模型
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
<div class="md-social">
<a href="https://github.com/flykhan/maths-cs-ai-compendium-zh" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["navigation.tabs", "navigation.sections", "navigation.expand", "navigation.top", "navigation.footer", "search.suggest", "search.highlight", "content.code.copy", "toc.follow"], "search": "../../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "\u5df2\u590d\u5236", "clipboard.copy": "\u590d\u5236", "search.result.more.one": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.more.other": "\u5728\u8be5\u9875\u4e0a\u8fd8\u6709 # \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.none": "\u6ca1\u6709\u627e\u5230\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.one": "\u627e\u5230 1 \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.other": "# \u4e2a\u7b26\u5408\u6761\u4ef6\u7684\u7ed3\u679c", "search.result.placeholder": "\u952e\u5165\u4ee5\u5f00\u59cb\u641c\u7d22", "search.result.term.missing": "\u7f3a\u5c11", "select.version": "\u9009\u62e9\u5f53\u524d\u7248\u672c"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.79ae519e.min.js"></script>
<script src="../../javascripts/mathjax.js"></script>
<script src="https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js"></script>
</body>
</html>