We implement an advanced, end-to-end Kornia tutorial and demonstrate how modern, differentiable computer vision can be built entirely in PyTorch. We start by constructing GPU-accelerated, synchronized augmentation pipelines for images, masks, and keypoints, then move into differentiable geometry by optimizing a homography directly through gradient descent. We also show how learned feature matching with LoFTR integrates with Kornia’s RANSAC to estimate robust homographies and produce a simple stitched output, even under constrained or offline-safe conditions. Finally, we ground these ideas in practice by training a lightweight CNN on CIFAR-10 using Kornia’s GPU augmentations, highlighting how research-grade vision pipelines translate naturally into learning systems. Check out the FULL CODES here. Copy CodeCopiedUse a different Browser import os, math, time, random, urllib.request from dataclasses import dataclass from typing import Tuple import sys, subprocess def pip_install(pkgs): subprocess.check_call([sys.executable, “-m”, “pip”, “install”, “-q”] + pkgs) pip_install([ “kornia==0.8.2”, “torch”, “torchvision”, “matplotlib”, “numpy”, “opencv-python-headless” ]) import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms.functional as TF import matplotlib.pyplot as plt import cv2 import kornia import kornia.augmentation as K import kornia.geometry.transform as KG from kornia.geometry.ransac import RANSAC from kornia.feature import LoFTR torch.manual_seed(0) np.random.seed(0) random.seed(0) print(“Torch:”, torch.__version__) print(“Kornia:”, kornia.__version__) print(“Device:”, device) We begin by setting up a fully reproducible environment, installing Kornia and its core dependencies to ensure GPU-accelerated, differentiable computer vision runs smoothly in Google Colab. We then import and organize PyTorch, Kornia, and supporting libraries, establishing a clean foundation for geometry, augmentation, and feature-matching workflows. We set the random seed and select the available compute device so that all subsequent experiments remain deterministic, debuggable, and performance-aware. Check out the FULL CODES here. Copy CodeCopiedUse a different Browser def to_tensor_img_uint8(img_bgr_uint8: np.ndarray) -> torch.Tensor: img_rgb = cv2.cvtColor(img_bgr_uint8, cv2.COLOR_BGR2RGB) t = torch.from_numpy(img_rgb).permute(2, 0, 1).float() / 255.0 return t.unsqueeze(0) def show(img_t: torch.Tensor, title: str = “”, max_size: int = 900): x = img_t.detach().float().cpu().clamp(0, 1) if x.shape[1] == 1: x = x.repeat(1, 3, 1, 1) x = x[0].permute(1, 2, 0).numpy() h, w = x.shape[:2] scale = min(1.0, max_size / max(h, w)) if scale < 1.0: x = cv2.resize(x, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_AREA) plt.figure(figsize=(7, 5)) plt.imshow(x) plt.axis(“off”) plt.title(title) plt.show() def show_mask(mask_t: torch.Tensor, title: str = “”): x = mask_t.detach().float().cpu().clamp(0, 1)[0, 0].numpy() plt.figure(figsize=(6, 4)) plt.imshow(x) plt.axis(“off”) plt.title(title) plt.show() def download(url: str, path: str): os.makedirs(os.path.dirname(path), exist_ok=True) if not os.path.exists(path): urllib.request.urlretrieve(url, path) def safe_download(url: str, path: str) -> bool: try: os.makedirs(os.path.dirname(path), exist_ok=True) if not os.path.exists(path): urllib.request.urlretrieve(url, path) return True except Exception as e: print(“Download failed:”, e) return False def make_grid_mask(h: int, w: int, cell: int = 32) -> torch.Tensor: yy, xx = torch.meshgrid(torch.arange(h), torch.arange(w), indexing=”ij”) m = (((yy // cell) % 2) ^ ((xx // cell) % 2)).float() return m.unsqueeze(0).unsqueeze(0) def draw_matches(img0_rgb: np.ndarray, img1_rgb: np.ndarray, pts0: np.ndarray, pts1: np.ndarray, max_draw: int = 200) -> np.ndarray: h0, w0 = img0_rgb.shape[:2] h1, w1 = img1_rgb.shape[:2] out = np.zeros((max(h0, h1), w0 + w1, 3), dtype=np.uint8) out[:h0, :w0] = img0_rgb out[:h1, w0:w0+w1] = img1_rgb n = min(len(pts0), len(pts1), max_draw) if n == 0: return out idx = np.random.choice(len(pts0), size=n, replace=False) if len(pts0) > n else np.arange(n) for i in idx: x0, y0 = pts0[i] x1, y1 = pts1[i] x1_shift = x1 + w0 p0 = (int(round(x0)), int(round(y0))) p1 = (int(round(x1_shift)), int(round(y1))) cv2.circle(out, p0, 2, (255, 255, 255), -1, lineType=cv2.LINE_AA) cv2.circle(out, p1, 2, (255, 255, 255), -1, lineType=cv2.LINE_AA) cv2.line(out, p0, p1, (255, 255, 255), 1, lineType=cv2.LINE_AA) return out def normalize_img_for_loftr(img_rgb01: torch.Tensor) -> torch.Tensor: if img_rgb01.shape[1] == 3: return kornia.color.rgb_to_grayscale(img_rgb01) return img_rgb01 We define a set of reusable helper utilities for image conversion, visualization, safe data downloading, and synthetic mask generation, keeping the vision pipeline clean and modular. We also implement robust visualization and matching helpers that allow us to inspect augmented images, masks, and LoFTR correspondences directly during experimentation. We normalize image inputs to the exact tensor formats expected by Kornia and LoFTR, ensuring that all downstream geometry and feature-matching components operate consistently and correctly. Check out the FULL CODES here. Copy CodeCopiedUse a different Browser print(“n[1] Differentiable augmentations: image + mask + keypoints”) B, C, H, W = 1, 3, 256, 384 img = torch.rand(B, C, H, W, device=device) mask = make_grid_mask(H, W, cell=24).to(device) kps = torch.tensor([[ [40.0, 40.0], [W – 50.0, 50.0], [W * 0.6, H * 0.8], [W * 0.25, H * 0.65], ]], device=device) aug = K.AugmentationSequential( K.RandomResizedCrop((224, 224), scale=(0.6, 1.0), ratio=(0.8, 1.25), p=1.0), K.RandomHorizontalFlip(p=0.5), K.RandomRotation(degrees=18.0, p=0.7), K.ColorJiggle(0.2, 0.2, 0.2, 0.1, p=0.8), data_keys=[“input”, “mask”, “keypoints”], same_on_batch=True ).to(device) img_aug, mask_aug, kps_aug = aug(img, mask, kps) print(“image:”, tuple(img.shape), “->”, tuple(img_aug.shape)) print(“mask :”, tuple(mask.shape), “->”, tuple(mask_aug.shape)) print(“kps :”, tuple(kps.shape), “->”, tuple(kps_aug.shape)) print(“Example keypoints (before -> after):”) print(torch.cat([kps[0], kps_aug[0]], dim=1)) show(img, “Original (synthetic)”) show_mask(mask, “Original mask (synthetic)”) show(img_aug, “Augmented (synced)”) show_mask(mask_aug, “Augmented mask (synced)”) We construct a synchronized, fully differentiable augmentation pipeline that applies the same geometric transformations to images, masks, and keypoints on the GPU. We generate synthetic data to clearly demonstrate how spatial consistency is preserved across modalities while still introducing realistic variability through cropping, rotation, flipping, and color jitter. We visualize the before-and-after results to verify that the augmented images, segmentation masks, and keypoints remain perfectly aligned after transformation. Check out the FULL CODES here. Copy CodeCopiedUse a different Browser print(“n[2] Differentiable homography alignment by optimization”) base = torch.rand(1, 1, 240, 320, device=device) show(base, “Base image (grayscale)”) true_H_px = torch.eye(3, device=device).unsqueeze(0) true_H_px[:, 0, 2] = 18.0 true_H_px[:, 1, 2] = -12.0 true_H_px[:, 0, 1] = 0.03 true_H_px[:, 1, 0] = -0.02 true_H_px[:, 2, 0] = 1e-4 true_H_px[:, 2, 1] = -8e-5 target = KG.warp_perspective(base, true_H_px, dsize=(base.shape[-2], base.shape[-1]), align_corners=True) show(target, “Target (base warped by true homography)”) p = torch.zeros(1, 8, device=device, requires_grad=True) def params_to_H(p8: torch.Tensor) -> torch.Tensor: Bp = p8.shape[0] Hm = torch.eye(3, device=p8.device).unsqueeze(0).repeat(Bp, 1, 1) Hm[:, 0, 0] = 1.0 + p8[:, 0] Hm[:, 0, 1] = p8[:, 1] Hm[:, 0, 2] = p8[:, 2] Hm[:, 1, 0] = p8[:, 3] Hm[:, 1, 1] = 1.0 + p8[:, 4] Hm[:, 1, 2] = p8[:, 5] Hm[:, 2, 0] = p8[:, 6] Hm[:, 2, 1] = p8[:, 7] return Hm opt = torch.optim.Adam([p], lr=0.08) losses = [] for step in