Musings from the mountains himwant.org

Add Depth Anything v3 Burn WASM

+268 -2
+57 -2
.tangled/workflows/deploy.yaml
··· 7 7 nixpkgs: 8 8 - nodejs 9 9 - bash 10 + - git 11 + - cacert 12 + - cargo 13 + - rustc 14 + - wasm-pack 15 + - clang # Compiler 16 + - llvm # Provides llvm-ar and other tools often needed by burn 10 17 11 18 steps: 12 19 - name: install dependencies 13 20 command: npm install 14 21 22 + - name: build external wasm models 23 + environment: 24 + MODEL_REPOS: >- 25 + https://tangled.org/akshitgaur2005.tngl.sh/depth_anything_v3_burn|depth 26 + command: | 27 + # --- FIX START --- 28 + # 1. Completely clear Nix wrapper flags 29 + # These variables are what force the compiler to look at Linux headers 30 + unset NIX_CFLAGS_COMPILE 31 + unset NIX_LDFLAGS 32 + unset C_INCLUDE_PATH 33 + unset CPATH 34 + unset LIBRARY_PATH 35 + 36 + # 2. Configure Clang for WASM 37 + export CC_wasm32_unknown_unknown=clang 38 + 39 + # 3. Force "No Standard Includes" (-nostdinc) 40 + # This stops Clang from looking at /nix/store/.../glibc/include 41 + # But we must manually add back Clang's internal headers (stddef.h, limits.h, etc.) 42 + CLANG_RES_DIR=$(clang -print-resource-dir)/include 43 + export CFLAGS_wasm32_unknown_unknown="-nostdinc -I${CLANG_RES_DIR} -Wno-macro-redefined" 44 + # --- FIX END --- 45 + 46 + mkdir -p public/wasm 47 + mkdir -p temp_build 48 + cd temp_build 49 + 50 + for entry in $MODEL_REPOS; do 51 + url="${entry%%|*}" 52 + name="${entry##*|}" 53 + 54 + echo "--- Processing: $name from $url ---" 55 + git clone "$url" "$name" 56 + 57 + cd "$name" 58 + echo "Checking out 'web' branch..." 59 + git checkout web 60 + 61 + echo "Building WASM..." 62 + wasm-pack build --target web --out-dir ../../public/wasm/$name --no-typescript --release 63 + 64 + rm -f ../../public/wasm/$name/.gitignore 65 + cd .. 66 + done 67 + 68 + cd .. 69 + rm -rf temp_build 70 + 15 71 - name: build site 16 - # Use 'npm run build' which runs 'astro build' 17 72 command: npm run build 18 73 19 74 - name: deploy 20 75 command: | 21 - npx --yes wrangler pages deploy dist --project-name himwant --branch main 76 + npx --yes wrangler pages deploy dist --project-name himwant --branch main
+211
src/pages/projects/depth.astro
··· 1 + --- 2 + import Layout from '~/layouts/Layout.astro' 3 + import BlockHeader from '~/components/BlockHeader.astro' 4 + 5 + const title = "Depth Estimator | Himwant" 6 + const description = "Run the Depth Anything model entirely in your browser using WebAssembly and Burn." 7 + --- 8 + 9 + <Layout title={title} description={description}> 10 + <section class="depth-tool"> 11 + <BlockHeader>Depth Estimator</BlockHeader> 12 + 13 + <p class="mb-6 opacity-75"> 14 + This runs the <strong>Depth Anything</strong> machine learning model locally in your browser. 15 + Your images are never uploaded to a server. 16 + </p> 17 + 18 + <!-- Controls Area --> 19 + <div class="controls border border-skin-line rounded p-4 mb-6 bg-skin-card-muted"> 20 + <div class="flex flex-col gap-4 sm:flex-row sm:items-end"> 21 + 22 + <!-- Backend Selection --> 23 + <div class="flex flex-col gap-1"> 24 + <label for="backend" class="text-sm font-bold">Backend</label> 25 + <select id="backend" class="p-2 rounded bg-skin-fill border border-skin-line text-skin-base"> 26 + <option value="ndarray">CPU (NdArray)</option> 27 + <option value="wgpu_f32">GPU (WebGPU f32)</option> 28 + <option value="wgpu_f16">GPU (WebGPU f16)</option> 29 + </select> 30 + </div> 31 + 32 + <!-- File Upload --> 33 + <div class="flex flex-col gap-1 flex-grow"> 34 + <label for="fileInput" class="text-sm font-bold">Upload Image</label> 35 + <input type="file" id="fileInput" accept="image/*" class="text-sm file:mr-4 file:py-2 file:px-4 file:rounded-full file:border-0 file:text-sm file:font-semibold file:bg-skin-accent file:text-skin-inverted hover:file:opacity-75"/> 36 + </div> 37 + </div> 38 + 39 + <!-- Status Bar --> 40 + <div class="mt-4 flex justify-between items-center text-sm font-mono"> 41 + <span id="status" class="text-skin-accent">Initializing WASM...</span> 42 + <span id="time" class="opacity-70"></span> 43 + </div> 44 + </div> 45 + 46 + <!-- Canvas Area --> 47 + <div class="grid grid-cols-1 md:grid-cols-2 gap-4"> 48 + <div class="canvas-wrapper"> 49 + <h3 class="font-bold mb-2">Input (Resized to 504x280)</h3> 50 + <canvas id="inputCanvas" width="504" height="280" class="w-full border border-skin-line rounded"></canvas> 51 + </div> 52 + <div class="canvas-wrapper"> 53 + <h3 class="font-bold mb-2">Depth Map</h3> 54 + <canvas id="outputCanvas" width="504" height="280" class="w-full border border-skin-line rounded"></canvas> 55 + </div> 56 + </div> 57 + 58 + </section> 59 + </Layout> 60 + 61 + <script> 62 + // Import from the path defined in your deploy.yaml (public/wasm/depth) 63 + import init, { DepthEstimator } from '/wasm/depth/depth_anything.js'; 64 + 65 + // Constants 66 + const MODEL_WIDTH = 504; 67 + const MODEL_HEIGHT = 280; 68 + 69 + // DOM Elements 70 + const statusDiv = document.getElementById("status"); 71 + const timeDiv = document.getElementById("time"); 72 + const backendSelect = document.getElementById("backend") as HTMLSelectElement; 73 + const fileInput = document.getElementById("fileInput") as HTMLInputElement; 74 + const inputCanvas = document.getElementById("inputCanvas") as HTMLCanvasElement; 75 + const outputCanvas = document.getElementById("outputCanvas") as HTMLCanvasElement; 76 + 77 + const inputCtx = inputCanvas.getContext("2d", { willReadFrequently: true }); 78 + const outputCtx = outputCanvas.getContext("2d"); 79 + 80 + let estimator; 81 + let isProcessing = false; 82 + 83 + async function loadWasm() { 84 + try { 85 + await init(); // Initialize WASM 86 + estimator = new DepthEstimator(); 87 + 88 + statusDiv.innerText = "Ready. Select an image."; 89 + 90 + // Check WebGPU support 91 + if (!navigator.gpu) { 92 + (backendSelect.options[1] as HTMLOptionElement).disabled = true; 93 + (backendSelect.options[2] as HTMLOptionElement).disabled = true; 94 + statusDiv.innerText += " (WebGPU not detected)"; 95 + } 96 + } catch (e) { 97 + console.error(e); 98 + statusDiv.innerText = "Error loading WASM model."; 99 + } 100 + } 101 + 102 + // --- Logic: Backend Switching --- 103 + backendSelect.addEventListener("change", async (e) => { 104 + if (!estimator || isProcessing) return; 105 + statusDiv.innerText = "Switching backend..."; 106 + isProcessing = true; 107 + 108 + // Small delay to allow UI to render text 109 + await new Promise(r => setTimeout(r, 10)); 110 + 111 + try { 112 + const target = e.target as HTMLSelectElement; 113 + const val = target.value; 114 + 115 + if (val === 'ndarray') await estimator.set_backend_ndarray(); 116 + else if (val === 'wgpu_f32') await estimator.set_backend_wgpu_f32(); 117 + else if (val === 'wgpu_f16') await estimator.set_backend_wgpu_f16(); 118 + 119 + statusDiv.innerText = `Backend switched to ${val}. Ready.`; 120 + 121 + // Re-run if image exists 122 + if (fileInput.files && fileInput.files.length > 0) { 123 + processImage(fileInput.files[0]); 124 + } 125 + } catch (err) { 126 + console.error(err); 127 + statusDiv.innerText = "Error switching backend."; 128 + } finally { 129 + isProcessing = false; 130 + } 131 + }); 132 + 133 + // --- Logic: File Upload --- 134 + fileInput.addEventListener("change", (e) => { 135 + const target = e.target as HTMLInputElement; 136 + if (target.files && target.files.length > 0) { 137 + processImage(target.files[0]); 138 + } 139 + }); 140 + 141 + async function processImage(file) { 142 + if (!estimator) return; 143 + isProcessing = true; 144 + statusDiv.innerText = "Processing..."; 145 + timeDiv.innerText = ""; 146 + 147 + const img = new Image(); 148 + const url = URL.createObjectURL(file); 149 + img.src = url; 150 + 151 + img.onload = async () => { 152 + // 1. Draw and Resize to EXACTLY 504x280 153 + inputCtx.clearRect(0, 0, MODEL_WIDTH, MODEL_HEIGHT); 154 + inputCtx.drawImage(img, 0, 0, MODEL_WIDTH, MODEL_HEIGHT); 155 + 156 + // 2. Extract Data 157 + const imageData = inputCtx.getImageData(0, 0, MODEL_WIDTH, MODEL_HEIGHT); 158 + const data = imageData.data; // RGBA Uint8 159 + 160 + // 3. Convert to Float32 flat array [R, G, B, R, G, B...] (Drop Alpha) 161 + const inputFloats = new Float32Array(MODEL_WIDTH * MODEL_HEIGHT * 3); 162 + let j = 0; 163 + for (let i = 0; i < data.length; i += 4) { 164 + inputFloats[j] = data[i]; // R 165 + inputFloats[j+1] = data[i+1]; // G 166 + inputFloats[j+2] = data[i+2]; // B 167 + j += 3; 168 + } 169 + 170 + try { 171 + const start = performance.now(); 172 + 173 + // 4. Run Inference (WASM) 174 + const outputBytes = await estimator.inference(inputFloats); 175 + 176 + const end = performance.now(); 177 + timeDiv.innerText = `Inference: ${(end - start).toFixed(2)}ms`; 178 + statusDiv.innerText = "Done."; 179 + 180 + // 5. Visualize Output (Grayscale -> RGBA) 181 + const outImgData = new ImageData(MODEL_WIDTH, MODEL_HEIGHT); 182 + for (let i = 0; i < outputBytes.length; i++) { 183 + const val = outputBytes[i]; 184 + const idx = i * 4; 185 + outImgData.data[idx] = val; // R 186 + outImgData.data[idx+1] = val; // G 187 + outImgData.data[idx+2] = val; // B 188 + outImgData.data[idx+3] = 255; // Alpha (Opaque) 189 + } 190 + outputCtx.putImageData(outImgData, 0, 0); 191 + 192 + } catch (err) { 193 + console.error(err); 194 + statusDiv.innerText = "Error during inference."; 195 + } finally { 196 + URL.revokeObjectURL(url); 197 + isProcessing = false; 198 + } 199 + }; 200 + } 201 + 202 + // Initialize on load 203 + loadWasm(); 204 + </script> 205 + 206 + <style> 207 + /* Local styling overrides if needed, mostly handled by Tailwind classes above */ 208 + .canvas-wrapper canvas { 209 + image-rendering: pixelated; /* Crisp edges for the model output */ 210 + } 211 + </style>