#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Script to create ground truth labels from directory structure """ import os import argparse import glob def main(): """Generate ground truth labels from directory structure""" parser = argparse.ArgumentParser(description="Generate ground truth labels from directory structure") parser.add_argument("--data_dir", type=str, default="SUR_projekt2024-2025/dev", help="Path to data directory") parser.add_argument("--output_file", type=str, default="dev_labels_all.txt", help="Path to output labels file") parser.add_argument("--file_type", type=str, choices=["all", "wav", "png"], default="all", help="Type of files to include in the ground truth") args = parser.parse_args() if not os.path.exists(args.data_dir): print(f"Error: Data directory '{args.data_dir}' does not exist.") return labels = [] # Iterate through each class directory (1-31) for class_dir in sorted(glob.glob(os.path.join(args.data_dir, "*"))): if not os.path.isdir(class_dir): continue # Get class ID from directory name class_id = os.path.basename(class_dir) try: class_id = int(class_id) except ValueError: continue # Find all files based on the specified file type if args.file_type == "wav" or args.file_type == "all": wav_files = glob.glob(os.path.join(class_dir, "*.wav")) for file_path in wav_files: # Get base filename without extension filename = os.path.basename(file_path) basename = os.path.splitext(filename)[0] # Add to labels labels.append((basename, class_id)) if args.file_type == "png" or args.file_type == "all": png_files = glob.glob(os.path.join(class_dir, "*.png")) for file_path in png_files: # Get base filename without extension filename = os.path.basename(file_path) basename = os.path.splitext(filename)[0] # Add to labels labels.append((basename, class_id)) # Sort labels by filename labels.sort(key=lambda x: x[0]) # Remove duplicates (in case both WAV and PNG have same basename) unique_labels = [] seen = set() for basename, class_id in labels: if basename not in seen: unique_labels.append((basename, class_id)) seen.add(basename) # Save labels to file with open(args.output_file, 'w') as f: for filename, class_id in unique_labels: f.write(f"{filename} {class_id}\n") print(f"Generated {len(unique_labels)} ground truth labels in '{args.output_file}'") if __name__ == "__main__": main()