from dataclasses import dataclass
from typing import Union, Iterable
import numpy as np
import torch
from PIL import Image
from deepinv.transform.base import Transform, TransformParam
try:
from kornia.geometry.transform import warp_perspective
except ImportError:
def warp_perspective(*args, **kwargs):
raise ImportError("The kornia package is not installed.")
def rotation_matrix(tx: float, ty: float, tz: float) -> np.ndarray:
"""Numpy implementation of ``scipy`` rotation matrix from Euler angles.
Construct 3D extrinsic rotation matrix from x, y and z angles. This is equivalent of using the ``scipy`` function:
``scipy.spatial.transform.Rotation.from_euler("xyz", (tx, ty, tz), degrees=True).as_matrix()``
:param float tx: x rotation in degrees
:param float ty: y rotation in degrees
:param float tz: z rotation in degrees
:return np.ndarray: 3D rotation matrix.
"""
tx, ty, tz = np.radians((tx, ty, tz))
# fmt: off
Rx = np.array([
[1, 0, 0],
[0, np.cos(tx), -np.sin(tx)],
[0, np.sin(tx), np.cos(tx)]
])
Ry = np.array([
[np.cos(ty), 0, np.sin(ty)],
[0, 1, 0],
[-np.sin(ty), 0, np.cos(ty)]
])
Rz = np.array([
[np.cos(tz), -np.sin(tz), 0],
[np.sin(tz), np.cos(tz), 0],
[0, 0, 1]
])
# fmt: on
return Rz @ Ry @ Rx
def apply_homography(
im: Union[torch.Tensor, Image.Image],
theta_x: float = 0.0,
theta_y: float = 0.0,
theta_z: float = 0.0,
zoom_factor: float = 1.0,
skew: float = 0,
x_stretch_factor: float = 1.0,
y_stretch_factor: float = 1.0,
x_t: float = 0.0,
y_t: float = 0.0,
padding: str = "reflection",
interpolation: str = "bilinear",
verbose: bool = False,
device="cpu",
**kwargs,
) -> Union[torch.Tensor, Image.Image]:
r"""Perform homography (projective transformation).
Given physical parameters describing camera variation, this function performs the geometric transformation given by the change in parameters.
See :class:`deepinv.transform.Homography` for more details.
The input image can be a torch Tensor, in which case ``kornia`` is used to perform the transformation, or a PIL Image where PIL transform is used.
Following https://arxiv.org/abs/2403.09327, we assume principal point in centre, initial focal length 100, initial skew of 0, initial square pixels.
:param torch.Tensor | Image.Image im: Input if tensor, image of shape (B,C,H,W), otherwise a PIL image.
:param float theta_x: tilt angle in degrees, defaults to 0.
:param float theta_y: pan angle in degrees, defaults to 0.
:param float theta_z: 2D rotation angle in degrees, defaults to 0.
:param float zoom_factor: relative focal length zoom (lower zooms out), defaults to 1.
:param float skew: relative pixel skew, defaults to 0
:param float x_stretch_factor: relative pixel x length factor, defaults to 1.
:param float y_stretch_factor: relative pixel y length factor, defaults to 1.
:param float x_t: relative x pixel translation, defaults to 0.
:param float y_t: relative y pixel translation, defaults to 0.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, choose from "bilinear", "nearest" or "bicubic". Defaults to "bilinear"
:param bool verbose: if True, print homography matrix, defaults to False
:param str device: torch device, defaults to "cpu"
:return torch.Tensor | Image.Image: transformed image.
"""
assert interpolation in ("bilinear", "bicubic", "nearest")
w, h = (im.shape[2], im.shape[3]) if isinstance(im, torch.Tensor) else im.size
u0, v0 = int(w / 2), int(h / 2)
f = 100
s = 0
m_x = m_y = 1
# fmt: off
K = np.array([
[f*m_x, s, u0],
[0, f*m_y, v0],
[0, 0, 1]
])
K_dash = np.array([
[f/zoom_factor*m_x/x_stretch_factor, s + skew, u0 + x_t],
[0, f/zoom_factor*m_y/y_stretch_factor, v0 + y_t],
[0, 0, 1]
])
# fmt: on
R_dash = rotation_matrix(theta_x, theta_y, theta_z)
if isinstance(im, torch.Tensor):
# note thetas defined in the opposite direction here, but it doesn't matter
# for random transformations which have symmetric ranges about 0.
H_inverse = K @ R_dash @ np.linalg.inv(K_dash)
if verbose:
with np.printoptions(precision=2, suppress=True):
print(H_inverse)
return warp_perspective(
im.double(),
torch.from_numpy(H_inverse)[None].to(device),
dsize=im.shape[2:],
mode=interpolation,
padding_mode=padding,
)
else:
if interpolation == "bilinear":
pil_interp = Image.Resampling.BILINEAR
elif interpolation == "bicubic":
pil_interp = Image.Resampling.BICUBIC
elif interpolation == "nearest":
pil_interp = Image.Resampling.NEAREST
H = K_dash @ R_dash @ np.linalg.inv(K)
return im.transform(
size=(im.size[0], im.size[1]),
method=Image.Transform.PERSPECTIVE,
data=H.flatten(),
resample=pil_interp,
)
[docs]
@dataclass
class Homography(Transform):
"""
Random projective transformations (homographies).
The homography is parameterised by
geometric parameters. By fixing these parameters, subgroup transformations are
retrieved, see Wang et al. "Perspective-Equivariant Imaging: an Unsupervised
Framework for Multispectral Pansharpening" https://arxiv.org/abs/2403.09327
For example, setting x_stretch_factor_min = y_stretch_factor_min = zoom_factor_min = 1,
theta_max = theta_z_max = skew_max = 0 gives a pure translation.
Subgroup transformations include :class:`deepinv.transform.projective.Affine`, :class:`deepinv.transform.projective.Similarity`,
:class:`deepinv.transform.projective.Euclidean` along with the basic :class:`deepinv.transform.Shift`,
:class:`deepinv.transform.Rotate` and semigroup :class:`deepinv.transform.Scale`.
Transformations with perspective effects (i.e. pan+tilt) are recovered by setting
theta_max > 0.
Generates ``n_trans`` random transformations concatenated along the batch dimension.
|sep|
:Example:
Apply a random projective transformation:
>>> from deepinv.transform.projective import Homography
>>> x = torch.randn(1, 3, 16, 16) # Random 16x16 image
>>> transform = Homography(n_trans = 1)
>>> x_T = transform(x)
:param float theta_max: Maximum pan+tilt angle in degrees, defaults to 180.
:param float theta_z_max: Maximum 2D z-rotation angle in degrees, defaults to 180.
:param float zoom_factor_min: Minimum zoom factor (up to 1), defaults to 0.5.
:param float shift_max: Maximum shift percentage, where 1 is full shift, defaults to 1.
:param float skew_max: Maximum skew parameter, defaults to 50.
:param float x_stretch_factor_min: Min stretch factor along the x-axis (up to 1), defaults to 0.5.
:param float y_stretch_factor_min: Min stretch factor along the y-axis (up to 1), defaults to 0.5.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, defaults to "bilinear"
:param str device: torch device, defaults to "cpu".
:param int n_trans: number of transformed versions generated per input image, defaults to 1.
:param torch.Generator rng: random number generator, if None, use torch.Generator(), defaults to None
"""
n_trans: int = 1
theta_max: float = 180.0
theta_z_max: float = 180.0
zoom_factor_min: float = 0.5
shift_max: float = 1.0
skew_max: float = 50.0
x_stretch_factor_min: float = 0.5
y_stretch_factor_min: float = 0.5
padding: str = "reflection"
interpolation: str = "bilinear"
device: str = "cpu"
rng: torch.Generator = None
def __post_init__(self, *args, **kwargs):
super().__init__(*args, n_trans=self.n_trans, rng=self.rng, **kwargs)
def rand(self, maxi: float, mini: float = None) -> torch.Tensor:
if mini is None:
mini = -maxi
out = (mini - maxi) * torch.rand(
self.n_trans, generator=self.rng, device=self.rng.device
) + maxi
return out.cpu() # require cpu for numpy
def _get_params(self, x: torch.Tensor) -> dict:
H, W = x.shape[-2:]
Reciprocal = lambda p: TransformParam(p, neg=lambda x: 1 / x)
return {
"theta_x": self.rand(self.theta_max),
"theta_y": self.rand(self.theta_max),
"theta_z": self.rand(self.theta_z_max),
"zoom_f": Reciprocal(self.rand(1, self.zoom_factor_min)),
"shift_x": self.rand(W / 2 * self.shift_max),
"shift_y": self.rand(H / 2 * self.shift_max), ### note W and H swapped
"skew": self.rand(self.skew_max),
"stretch_x": Reciprocal(self.rand(1, self.x_stretch_factor_min)),
"stretch_y": Reciprocal(self.rand(1, self.y_stretch_factor_min)),
}
def _transform(
self,
x: torch.Tensor,
theta_x: Union[torch.Tensor, Iterable, TransformParam] = [],
theta_y: Union[torch.Tensor, Iterable, TransformParam] = [],
theta_z: Union[torch.Tensor, Iterable, TransformParam] = [],
zoom_f: Union[torch.Tensor, Iterable, TransformParam] = [],
shift_x: Union[torch.Tensor, Iterable, TransformParam] = [],
shift_y: Union[torch.Tensor, Iterable, TransformParam] = [],
skew: Union[torch.Tensor, Iterable, TransformParam] = [],
stretch_x: Union[torch.Tensor, Iterable, TransformParam] = [],
stretch_y: Union[torch.Tensor, Iterable, TransformParam] = [],
**params,
) -> torch.Tensor:
return torch.cat(
[
apply_homography(
x.double(),
theta_x=tx,
theta_y=ty,
theta_z=tz,
zoom_factor=zf,
x_t=xt,
y_t=yt,
skew=sk,
x_stretch_factor=xsf,
y_stretch_factor=ysf,
padding=self.padding,
interpolation=self.interpolation,
device=self.device,
)
for tx, ty, tz, zf, xt, yt, sk, xsf, ysf in zip(
theta_x,
theta_y,
theta_z,
zoom_f,
shift_x,
shift_y,
skew,
stretch_x,
stretch_y,
)
],
dim=0,
).float()
[docs]
class Affine(Homography):
"""Random affine image transformations using projective transformation framework.
Special case of homography which corresponds to the actions of the affine subgroup
Aff(3). Affine transformations include translations, rotations, reflections,
skews, and stretches. These transformations are parametrised using geometric parameters in the pinhole camera model.
See :class:`deepinv.transform.Homography` for more details.
Generates ``n_trans`` random transformations concatenated along the batch dimension.
|sep|
:Example:
Apply a random affine transformation:
>>> from deepinv.transform.projective import Affine
>>> x = torch.randn(1, 3, 16, 16) # Random 16x16 image
>>> transform = Affine(n_trans = 1)
>>> x_T = transform(x)
:param float theta_z_max: Maximum 2D z-rotation angle in degrees, defaults to 180.
:param float zoom_factor_min: Minimum zoom factor (up to 1), defaults to 0.5.
:param float shift_max: Maximum shift percentage, where 1 is full shift, defaults to 1.
:param float skew_max: Maximum skew parameter, defaults to 50.
:param float x_stretch_factor_min: Min stretch factor along the x-axis (up to 1), defaults to 0.5.
:param float y_stretch_factor_min: Min stretch factor along the y-axis (up to 1), defaults to 0.5.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, defaults to "bilinear"
:param str device: torch device, defaults to "cpu".
:param n_trans: number of transformed versions generated per input image, defaults to 1.
:param torch.Generator rng: random number generator, if None, use torch.Generator(), defaults to None
"""
def _get_params(self, x: torch.Tensor) -> dict:
self.theta_max = 0
return super()._get_params(x)
[docs]
class Similarity(Homography):
"""Random 2D similarity image transformations using projective transformation framework.
Special case of homography which corresponds to the actions of the similarity subgroup
S(2). Similarity transformations include translations, rotations, reflections and
uniform scale. These transformations are parametrised using geometric parameters in the pinhole camera model. See :class:`deepinv.transform.Homography` for more details.
Generates ``n_trans`` random transformations concatenated along the batch dimension.
|sep|
:Example:
Apply a random similarity transformation:
>>> from deepinv.transform.projective import Similarity
>>> x = torch.randn(1, 3, 16, 16) # Random 16x16 image
>>> transform = Similarity(n_trans = 1)
>>> x_T = transform(x)
:param float theta_z_max: Maximum 2D z-rotation angle in degrees, defaults to 180.
:param float zoom_factor_min: Minimum zoom factor (up to 1), defaults to 0.5.
:param float shift_max: Maximum shift percentage, where 1 is full shift, defaults to 1.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, defaults to "bilinear"
:param str device: torch device, defaults to "cpu".
:param n_trans: number of transformed versions generated per input image, defaults to 1.
:param torch.Generator rng: random number generator, if None, use torch.Generator(), defaults to None
"""
def _get_params(self, x: torch.Tensor) -> dict:
self.theta_max = self.skew_max = 0
self.x_stretch_factor_min = self.y_stretch_factor_min = 1
return super()._get_params(x)
[docs]
class Euclidean(Homography):
"""Random Euclidean image transformations using projective transformation framework.
Special case of homography which corresponds to the actions of the Euclidean subgroup
E(2). Euclidean transformations include translations, rotations and reflections. These transformations are parametrised using geometric parameters in the pinhole camera model.
See :class:`deepinv.transform.Homography` for more details.
Generates ``n_trans`` random transformations concatenated along the batch dimension.
|sep|
:Example:
Apply a random Euclidean transformation:
>>> from deepinv.transform.projective import Euclidean
>>> x = torch.randn(1, 3, 16, 16) # Random 16x16 image
>>> transform = Euclidean(n_trans = 1)
>>> x_T = transform(x)
:param float theta_z_max: Maximum 2D z-rotation angle in degrees, defaults to 180.
:param float shift_max: Maximum shift percentage, where 1 is full shift, defaults to 1.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, defaults to "bilinear"
:param str device: torch device, defaults to "cpu".
:param n_trans: number of transformed versions generated per input image, defaults to 1.
:param torch.Generator rng: random number generator, if None, use torch.Generator(), defaults to None
"""
def _get_params(self, x: torch.Tensor) -> dict:
self.theta_max = self.skew_max = 0
self.zoom_factor_min = self.x_stretch_factor_min = self.y_stretch_factor_min = 1
return super()._get_params(x)
[docs]
class PanTiltRotate(Homography):
"""Random 3D camera rotation image transformations using projective transformation framework.
Special case of homography which corresponds to the actions of the 3D camera rotation,
or "pan+tilt+rotate" subgroup from Wang et al. "Perspective-Equivariant Imaging: an
Unsupervised Framework for Multispectral Pansharpening" https://arxiv.org/abs/2403.09327
The transformations simulate panning, tilting or rotating the camera, leading to a
"perspective" effect. The subgroup is isomorphic to SO(3).
See :class:`deepinv.transform.Homography` for more details.
Generates ``n_trans`` random transformations concatenated along the batch dimension.
|sep|
:Example:
Apply a random pan+tilt+rotate transformation:
>>> from deepinv.transform.projective import PanTiltRotate
>>> x = torch.randn(1, 3, 16, 16) # Random 16x16 image
>>> transform = PanTiltRotate(n_trans = 1)
>>> x_T = transform(x)
:param float theta_max: Maximum pan+tilt angle in degrees, defaults to 180.
:param float theta_z_max: Maximum 2D z-rotation angle in degrees, defaults to 180.
:param str padding: kornia padding mode, defaults to "reflection"
:param str interpolation: kornia or PIL interpolation mode, defaults to "bilinear"
:param str device: torch device, defaults to "cpu".
:param n_trans: number of transformed versions generated per input image, defaults to 1.
:param torch.Generator rng: random number generator, if None, use torch.Generator(), defaults to None
"""
def _get_params(self, x: torch.Tensor) -> dict:
self.shift_max = self.skew_max = 0
self.zoom_factor_min = self.x_stretch_factor_min = self.y_stretch_factor_min = 1
return super()._get_params(x)