#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Audio preprocessing script for SUR project Removes the first second from each audio recording to eliminate initial spikes """ import os import glob import argparse import numpy as np import librosa import soundfile as sf from tqdm import tqdm def trim_audio_file(input_file, output_file, trim_seconds=1.0, sample_rate=None): """ Load audio file, trim the first N seconds, and save the result. Args: input_file: Path to input audio file output_file: Path to output audio file trim_seconds: Number of seconds to trim from the beginning sample_rate: Sample rate to resample to (or None to keep original) Returns: True if successful, False otherwise """ try: # Load audio file (preserving original sample rate if sample_rate is None) y, sr = librosa.load(input_file, sr=sample_rate) # Calculate number of samples to trim trim_samples = int(trim_seconds * sr) # Ensure the audio is long enough to trim if len(y) <= trim_samples: print(f"Warning: {input_file} is too short to trim {trim_seconds} seconds. Skipping.") return False # Trim the first N seconds y_trimmed = y[trim_samples:] # Ensure output directory exists os.makedirs(os.path.dirname(output_file), exist_ok=True) # Save the trimmed audio sf.write(output_file, y_trimmed, sr) return True except Exception as e: print(f"Error processing {input_file}: {e}") return False def process_directory(input_dir, output_dir, trim_seconds=1.0, sample_rate=None, recursive=True): """ Process all WAV files in a directory and its subdirectories. Args: input_dir: Input directory containing WAV files output_dir: Output directory for processed files trim_seconds: Number of seconds to trim from the beginning sample_rate: Sample rate to resample to (or None to keep original) recursive: Whether to recursively process subdirectories Returns: Tuple of (num_processed, num_total) """ # Find all WAV files pattern = os.path.join(input_dir, "**" if recursive else "", "*.wav") wav_files = glob.glob(pattern, recursive=recursive) print(len(wav_files)) # Process each file num_processed = 0 for wav_file in tqdm(wav_files, desc="Processing files"): # Construct output path (preserving directory structure) rel_path = os.path.relpath(wav_file, input_dir) output_file = os.path.join(output_dir, rel_path) # Process the file success = trim_audio_file(wav_file, output_file, trim_seconds, sample_rate) if success: num_processed += 1 return num_processed, len(wav_files) def process_dataset(data_dir, output_dir, trim_seconds=1.0, sample_rate=None): """ Process an entire SUR dataset (train and dev subsets). Args: data_dir: Base directory of the dataset output_dir: Output directory for processed dataset trim_seconds: Number of seconds to trim from the beginning sample_rate: Sample rate to resample to (or None to keep original) """ # Process train directory train_dir = os.path.join(data_dir, "train") output_train_dir = os.path.join(output_dir, "train") if os.path.exists(train_dir): print(f"Processing training data...") train_processed, train_total = process_directory( train_dir, output_train_dir, trim_seconds, sample_rate ) print(f"Processed {train_processed}/{train_total} training files") # Process dev directory dev_dir = os.path.join(data_dir, "dev") output_dev_dir = os.path.join(output_dir, "dev") if os.path.exists(dev_dir): print(f"Processing development data...") dev_processed, dev_total = process_directory( dev_dir, output_dev_dir, trim_seconds, sample_rate ) print(f"Processed {dev_processed}/{dev_total} development files") def main(): """Main function""" parser = argparse.ArgumentParser(description="Preprocess audio files by removing initial spikes") parser.add_argument("--input_dir", type=str, default="SUR_projekt2024-2025", help="Input directory containing the dataset") parser.add_argument("--output_dir", type=str, default="SUR_projekt2024-2025_processed", help="Output directory for processed dataset") parser.add_argument("--trim_seconds", type=float, default=1.0, help="Number of seconds to trim from the beginning of each file") parser.add_argument("--sample_rate", type=int, default=None, help="Sample rate to resample to (or None to keep original)") parser.add_argument("--single_file", type=str, default=None, help="Process a single file instead of a directory") parser.add_argument("--single_output", type=str, default=None, help="Output path for the single file") args = parser.parse_args() if args.single_file: # Process a single file if not args.single_output: args.single_output = args.single_file.replace(".wav", "_processed.wav") print(f"Processing single file: {args.single_file}") success = trim_audio_file(args.single_file, args.single_output, args.trim_seconds, args.sample_rate) print(f"Processing {'successful' if success else 'failed'}") else: # Process the entire dataset process_dataset(args.input_dir, args.output_dir, args.trim_seconds, args.sample_rate) print(f"Preprocessing complete. Processed dataset saved to {args.output_dir}") if __name__ == "__main__": main()