Refactored code from lightning model to torch model

code-dev05 · code-dev05 · commit 838bc50ed1d4 · 2025-07-01T17:34:59.000+05:30
Signed-off-by: Devansh Agarwal &lt;devansh.agarwal.official@gmail.com&gt;
diff --git a/src/anomalib/models/image/glass/lightning_model.py b/src/anomalib/models/image/glass/lightning_model.py
@@ -18,13 +18,11 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-import math
 from typing import Any
 
 import torch
 from lightning.pytorch.utilities.types import STEP_OUTPUT
-from torch import nn, optim
-from torch.nn import functional as f
+from torch import optim
 from torchvision.transforms.v2 import CenterCrop, Compose, Normalize, Resize
 
 from anomalib import LearningType
@@ -36,7 +34,6 @@
 from anomalib.pre_processing import PreProcessor
 from anomalib.visualization import Visualizer
 
-from .loss import FocalLoss
 from .torch_model import GlassModel
 
 
@@ -150,6 +147,7 @@ def __init__(
 
         self.model = GlassModel(
             input_shape=input_shape,
+            anomaly_source_path=anomaly_source_path,
             pretrain_embed_dim=pretrain_embed_dim,
             target_embed_dim=target_embed_dim,
             backbone=backbone,
@@ -161,22 +159,19 @@ def __init__(
             dsc_layers=dsc_layers,
             dsc_hidden=dsc_hidden,
             dsc_margin=dsc_margin,
+            step=step,
+            svd=svd,
+            mining=mining,
+            noise=noise,
+            radius=radius,
+            p=p,
         )
 
         self.c = torch.tensor([1])
-        self.p = p
-        self.radius = radius
-        self.mining = mining
-        self.noise = noise
-        self.distribution = 0
         self.lr = lr
-        self.step = step
-        self.svd = svd
 
         self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
-        self.focal_loss = FocalLoss()
-
         if pre_proj > 0:
             self.proj_opt = optim.AdamW(
                 self.model.pre_projection.parameters(),
@@ -280,84 +275,7 @@ def training_step(
             self.backbone_opt.zero_grad()
 
         img = batch.image
-        aug, mask_s = self.augmentor(img)
-        if img is not None:
-            batch_size = img.shape[0]
-
-        true_feats, fake_feats = self.model(img, aug)
-
-        h_ratio = mask_s.shape[2] // int(math.sqrt(fake_feats.shape[0] // batch_size))
-        w_ratio = mask_s.shape[3] // int(math.sqrt(fake_feats.shape[0] // batch_size))
-
-        mask_s_resized = f.interpolate(
-            mask_s.float(),
-            size=(mask_s.shape[2] // h_ratio, mask_s.shape[3] // w_ratio),
-            mode="nearest",
-        )
-        mask_s_gt = mask_s_resized.reshape(-1, 1)
-
-        noise = torch.normal(0, self.noise, true_feats.shape).to(self.dev)
-        gaus_feats = true_feats + noise
-
-        center = self.c.repeat(img.shape[0], 1, 1)
-        center = center.reshape(-1, center.shape[-1])
-        true_points = torch.concat(
-            [fake_feats[mask_s_gt[:, 0] == 0], true_feats],
-            dim=0,
-        )
-        c_t_points = torch.concat([center[mask_s_gt[:, 0] == 0], center], dim=0)
-        dist_t = torch.norm(true_points - c_t_points, dim=1)
-        r_t = torch.tensor([torch.quantile(dist_t, q=self.radius)]).to(self.dev)
-
-        for step in range(self.step + 1):
-            scores = self.model.discriminator(torch.cat([true_feats, gaus_feats]))
-            true_scores = scores[: len(true_feats)]
-            gaus_scores = scores[len(true_feats) :]
-            true_loss = nn.BCELoss()(true_scores, torch.zeros_like(true_scores))
-            gaus_loss = nn.BCELoss()(gaus_scores, torch.ones_like(gaus_scores))
-            bce_loss = true_loss + gaus_loss
-
-            if step == self.step:
-                break
-
-            grad = torch.autograd.grad(gaus_loss, [gaus_feats])[0]
-            grad_norm = torch.norm(grad, dim=1)
-            grad_norm = grad_norm.view(-1, 1)
-            grad_normalized = grad / (grad_norm + 1e-10)
-
-            with torch.no_grad():
-                gaus_feats.add_(0.001 * grad_normalized)
-
-        fake_points = fake_feats[mask_s_gt[:, 0] == 1]
-        true_points = true_feats[mask_s_gt[:, 0] == 1]
-        c_f_points = center[mask_s_gt[:, 0] == 1]
-        dist_f = torch.norm(fake_points - c_f_points, dim=1)
-        proj_feats = c_f_points if self.svd == 1 else true_points
-        r = r_t if self.svd == 1 else 1
-
-        if self.svd == 1:
-            h = fake_points - proj_feats
-            h_norm = dist_f if self.svd == 1 else torch.norm(h, dim=1)
-            alpha = torch.clamp(h_norm, 2 * r, 4 * r)
-            proj = (alpha / (h_norm + 1e-10)).view(-1, 1)
-            h = proj * h
-            fake_points = proj_feats + h
-            fake_feats[mask_s_gt[:, 0] == 1] = fake_points
-
-        fake_scores = self.model.discriminator(fake_feats)
-
-        if self.p > 0:
-            fake_dist = (fake_scores - mask_s_gt) ** 2
-            d_hard = torch.quantile(fake_dist, q=self.p)
-            fake_scores_ = fake_scores[fake_dist >= d_hard].unsqueeze(1)
-            mask_ = mask_s_gt[fake_dist >= d_hard].unsqueeze(1)
-        else:
-            fake_scores_ = fake_scores
-            mask_ = mask_s_gt
-        output = torch.cat([1 - fake_scores_, fake_scores_], dim=1)
-        focal_loss = self.focal_loss(output, mask_)
-
-        loss = bce_loss + focal_loss
+        true_loss, gaus_loss, bce_loss, focal_loss, loss = self.model(img, self.c)
         loss.backward()
 
         if self.proj_opt is not None:
diff --git a/src/anomalib/models/image/glass/torch_model.py b/src/anomalib/models/image/glass/torch_model.py
@@ -24,9 +24,12 @@
 import torch.nn.functional as f
 from torch import nn
 
+from anomalib.data.utils.generators.perlin import PerlinAnomalyGenerator
 from anomalib.models.components import TimmFeatureExtractor
 from anomalib.models.components.feature_extractors import dryrun_find_featuremap_dims
 
+from .loss import FocalLoss
+
 
 def init_weight(m: nn.Module) -> None:
     """Initializes network weights using Xavier normal initialization.
@@ -313,6 +316,7 @@ class GlassModel(nn.Module):
     def __init__(
         self,
         input_shape: tuple[int, int],  # (H, W)
+        anomaly_source_path: str,
         pretrain_embed_dim: int = 1024,
         target_embed_dim: int = 1024,
         backbone: str = "resnet18",
@@ -324,6 +328,13 @@ def __init__(
         dsc_layers: int = 2,
         dsc_hidden: int = 1024,
         dsc_margin: float = 0.5,
+        mining: int = 1,
+        noise: float = 0.015,
+        radius: float = 0.75,
+        p: float = 0.5,
+        lr: float = 0.0001,
+        step: int = 20,
+        svd: int = 0,
     ) -> None:
         super().__init__()
 
@@ -335,6 +346,12 @@ def __init__(
         self.input_shape = input_shape
         self.pre_trained = pre_trained
 
+        self.augmentor = PerlinAnomalyGenerator(anomaly_source_path)
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+        self.focal_loss = FocalLoss()
+
         self.forward_modules = torch.nn.ModuleDict({})
         feature_aggregator = TimmFeatureExtractor(
             backbone=self.backbone,
@@ -367,6 +384,15 @@ def __init__(
             hidden=self.dsc_hidden,
         )
 
+        self.p = p
+        self.radius = radius
+        self.mining = mining
+        self.noise = noise
+        self.distribution = 0
+        self.lr = lr
+        self.step = step
+        self.svd = svd
+
         self.patch_maker = PatchMaker(patchsize, stride=patchstride)
 
     def calculate_mean(self, images: torch.Tensor) -> torch.Tensor:
@@ -400,6 +426,41 @@ def calculate_mean(self, images: torch.Tensor) -> torch.Tensor:
 
             return torch.mean(outputs, dim=0)
 
+    def calculate_features(self,
+        img: torch.Tensor,
+        aug: torch.Tensor,
+        evaluation: bool = False,
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        """Calculate and return feature embeddings for the input and augmented images.
+
+        Depending on whether a pre-projection module is used, this method optionally applies it to the
+
+        Args:
+            img (torch.Tensor): The original input image tensor.
+            aug (torch.Tensor): The augmented image tensor.
+            evaluation (bool, optional): Whether the model is in evaluation mode. Defaults to False.
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor]: A tuple containing the feature embeddings for the original
+                image (`true_feats`) and the augmented image (`fake_feats`).
+        """
+        if self.pre_proj > 0:
+            fake_feats = self.pre_projection(
+                self.generate_embeddings(aug, evaluation=evaluation)[0],
+            )
+            fake_feats = fake_feats[0] if len(fake_feats) == 2 else fake_feats
+            true_feats = self.pre_projection(
+                self.generate_embeddings(img, evaluation=evaluation)[0],
+            )
+            true_feats = true_feats[0] if len(true_feats) == 2 else true_feats
+        else:
+            fake_feats = self.generate_embeddings(aug, evaluation=evaluation)[0]
+            fake_feats.requires_grad = True
+            true_feats = self.generate_embeddings(img, evaluation=evaluation)[0]
+            true_feats.requires_grad = True
+
+        return true_feats, fake_feats
+
     def generate_embeddings(
         self,
         images: torch.Tensor,
@@ -488,28 +549,90 @@ def generate_embeddings(
     def forward(
         self,
         img: torch.Tensor,
-        aug: torch.Tensor,
-        evaluation: bool = False,
+        c: torch.Tensor | None = None,
     ) -> tuple[torch.Tensor, torch.Tensor]:
         """Forward pass to compute patch-wise feature embeddings for original and augmented images.
 
         Depending on whether a pre-projection module is used, this method optionally applies it to the
         embeddings generated for both `img` and `aug`. If not, the embeddings are directly obtained and
         `requires_grad` is enabled for them, likely for gradient-based optimization or anomaly generation.
         """
-        if self.pre_proj > 0:
-            fake_feats = self.pre_projection(
-                self.generate_embeddings(aug, evaluation=evaluation)[0],
-            )
-            fake_feats = fake_feats[0] if len(fake_feats) == 2 else fake_feats
-            true_feats = self.pre_projection(
-                self.generate_embeddings(img, evaluation=evaluation)[0],
-            )
-            true_feats = true_feats[0] if len(true_feats) == 2 else true_feats
+        aug, mask_s = self.augmentor(img)
+        if img is not None:
+            batch_size = img.shape[0]
+
+        true_feats, fake_feats = self.calculate_features(img, aug)
+
+        h_ratio = mask_s.shape[2] // int(math.sqrt(fake_feats.shape[0] // batch_size))
+        w_ratio = mask_s.shape[3] // int(math.sqrt(fake_feats.shape[0] // batch_size))
+
+        mask_s_resized = f.interpolate(
+            mask_s.float(),
+            size=(mask_s.shape[2] // h_ratio, mask_s.shape[3] // w_ratio),
+            mode="nearest",
+        )
+        mask_s_gt = mask_s_resized.reshape(-1, 1)
+
+        noise = torch.normal(0, self.noise, true_feats.shape).to(self.device)
+        gaus_feats = true_feats + noise
+
+        center = c.repeat(img.shape[0], 1, 1)
+        center = center.reshape(-1, center.shape[-1])
+        true_points = torch.concat(
+            [fake_feats[mask_s_gt[:, 0] == 0], true_feats],
+            dim=0,
+        )
+        c_t_points = torch.concat([center[mask_s_gt[:, 0] == 0], center], dim=0)
+        dist_t = torch.norm(true_points - c_t_points, dim=1)
+        r_t = torch.tensor([torch.quantile(dist_t, q=self.radius)]).to(self.device)
+
+        for step in range(self.step + 1):
+            scores = self.discriminator(torch.cat([true_feats, gaus_feats]))
+            true_scores = scores[: len(true_feats)]
+            gaus_scores = scores[len(true_feats) :]
+            true_loss = nn.BCELoss()(true_scores, torch.zeros_like(true_scores))
+            gaus_loss = nn.BCELoss()(gaus_scores, torch.ones_like(gaus_scores))
+            bce_loss = true_loss + gaus_loss
+
+            if step == self.step:
+                break
+
+            grad = torch.autograd.grad(gaus_loss, [gaus_feats])[0]
+            grad_norm = torch.norm(grad, dim=1)
+            grad_norm = grad_norm.view(-1, 1)
+            grad_normalized = grad / (grad_norm + 1e-10)
+
+            with torch.no_grad():
+                gaus_feats.add_(0.001 * grad_normalized)
+
+        fake_points = fake_feats[mask_s_gt[:, 0] == 1]
+        true_points = true_feats[mask_s_gt[:, 0] == 1]
+        c_f_points = center[mask_s_gt[:, 0] == 1]
+        dist_f = torch.norm(fake_points - c_f_points, dim=1)
+        proj_feats = c_f_points if self.svd == 1 else true_points
+        r = r_t if self.svd == 1 else 1
+
+        if self.svd == 1:
+            h = fake_points - proj_feats
+            h_norm = dist_f if self.svd == 1 else torch.norm(h, dim=1)
+            alpha = torch.clamp(h_norm, 2 * r, 4 * r)
+            proj = (alpha / (h_norm + 1e-10)).view(-1, 1)
+            h = proj * h
+            fake_points = proj_feats + h
+            fake_feats[mask_s_gt[:, 0] == 1] = fake_points
+
+        fake_scores = self.discriminator(fake_feats)
+
+        if self.p > 0:
+            fake_dist = (fake_scores - mask_s_gt) ** 2
+            d_hard = torch.quantile(fake_dist, q=self.p)
+            fake_scores_ = fake_scores[fake_dist >= d_hard].unsqueeze(1)
+            mask_ = mask_s_gt[fake_dist >= d_hard].unsqueeze(1)
         else:
-            fake_feats = self.generate_embeddings(aug, evaluation=evaluation)[0]
-            fake_feats.requires_grad = True
-            true_feats = self.generate_embeddings(img, evaluation=evaluation)[0]
-            true_feats.requires_grad = True
+            fake_scores_ = fake_scores
+            mask_ = mask_s_gt
+        output = torch.cat([1 - fake_scores_, fake_scores_], dim=1)
+        focal_loss = self.focal_loss(output, mask_)
 
-        return true_feats, fake_feats
+        loss = bce_loss + focal_loss
+        return true_loss, gaus_loss, bce_loss, focal_loss, loss