""" Script to generate augmented images. Explanation of arguments: --num_augmentations: Number of augmented images to generate for each image. --image_dir: Directory containing the images to augment. --output_dir: Directory to save the augmented images to. Usage: python src/image/augment.py --num_augmentations 40 --image_dir ./train --output_dir ./augmented """ import argparse import random import shutil from pathlib import Path import albumentations as A from tqdm import tqdm from PIL import Image, ImageDraw import numpy as np from albumentations.core.transforms_interface import ImageOnlyTransform # set random seed random.seed(42) class RandomPolygonOcclusion(ImageOnlyTransform): def __init__(self, max_offset_ratio=0.15, fill_color=(0, 0, 0, 128), p=0.5): """ Args: max_offset_ratio (float): Maximum fraction of the image size to offset points when generating a random polygon. Default is 0.15. fill_color (tuple of int): Color to fill the polygon with. Default is (0, 0, 0, 128). p (float): Probability of applying this transform. Default is 0.5. """ super().__init__(p=p) self.max_offset_ratio = max_offset_ratio self.fill_color = fill_color def apply(self, image, **params): img = Image.fromarray(image) width, height = img.size points = [(random.randint(0, width), random.randint(0, height))] for _ in range(random.randint(4, 9)): x_off = random.randint(-int(width * self.max_offset_ratio), int(width * self.max_offset_ratio)) y_off = random.randint(-int(height * self.max_offset_ratio), int(height * self.max_offset_ratio)) last_x, last_y = points[-1] next_x = (last_x + x_off) % width next_y = (last_y + y_off) % height points.append((next_x, next_y)) mask = Image.new('RGBA', img.size, (0, 0, 0, 0)) draw = ImageDraw.Draw(mask) draw.polygon(points, fill=self.fill_color) img_rgba = img.convert("RGBA") result = Image.alpha_composite(img_rgba, mask).convert("RGB") return np.array(result) # Argument parsing parser = argparse.ArgumentParser(description="Albumentations dataset augmentation.") parser.add_argument("--num_augmentations", type=int, default=40) parser.add_argument("--image_dir", type=str, default="./train") parser.add_argument("--output_dir", type=str, default="./augmented") args = parser.parse_args() INPUT_DIR = Path(args.image_dir) OUTPUT_DIR = Path(args.output_dir) NUM_AUGS = args.num_augmentations # Clean output dir shutil.rmtree(OUTPUT_DIR, ignore_errors=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=False) # Define augmentations AUGMENTATION_PIPELINE = A.Compose([ # Geometric transformations A.OneOf([ A.Affine(rotate=(-15, 15), translate_percent=(0.05, 0.05), scale=(0.95, 1.05), shear=(-5, 5), p=0.7), A.Perspective(scale=(0.05, 0.1), p=0.3), ], p=0.6), # Blur and noise A.OneOf([ A.GaussianBlur(blur_limit=(3, 7), p=0.4), A.MotionBlur(blur_limit=5, p=0.4), A.MedianBlur(blur_limit=5, p=0.4), ], p=0.4), # Noise A.OneOf([ A.GaussNoise(std_range=(0.05, 0.4), noise_scale_factor=0.9, p=0.6), A.ISONoise(p=0.4) ], p=0.3), # Occlusion techniques A.OneOf([ RandomPolygonOcclusion(p=0.7), A.CoarseDropout(num_holes_range=(1, 2), p=0.5) ], p=0.4), # Color and contrast A.OneOf([ A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2) ], p=0.5), A.HorizontalFlip(p=0.5), # Final spatial tweak and crop A.RandomCrop(height=80, width=80, p=0.3), A.Resize(height=80, width=80) # Keep shape consistent ]) # Main loop for class_dir in tqdm(INPUT_DIR.iterdir(), desc="Augmenting classes"): if not class_dir.is_dir(): continue output_class_dir = OUTPUT_DIR / class_dir.name output_class_dir.mkdir(exist_ok=True) for img_path in class_dir.glob("*.png"): image = np.array(Image.open(img_path).convert("RGB")) base_name = img_path.stem # Save original Image.fromarray(image).save(output_class_dir / f"{base_name}.png") # Augmentations for i in range(NUM_AUGS): augmented = AUGMENTATION_PIPELINE(image=image)["image"] aug_name = f"{base_name}-aug-{i+1}.png" Image.fromarray(augmented).save(output_class_dir / aug_name) print("Augmentation done.")