# image_classifier_cli_refactored.py

##
# @file image_classifier_cli_refactored.py
# @brief Command-line tool for image-based person identification using HOG features and SVM.
#
# This script provides modes to:
#   1. Optimize HOG feature parameters (resize, pixels per cell, cells per block, orientations)
#      by evaluating performance on a development set using a fixed LinearSVC.
#   2. Train an SVM model (LinearSVC) with specified HOG parameters on a training set,
#      save the model, and evaluate it on the development set.
#   3. Evaluate a pre-trained model file on the development set.
#   4. Predict identities for images in an input directory using a pre-trained model
#      and output results in the specified project format.
#
# It uses scikit-image for HOG extraction and scikit-learn for SVM classification and evaluation.
# Assumes data is organized in class-labeled subdirectories for train/dev sets.
#

import os
import glob
import numpy as np
import itertools
import time
import pickle
import argparse
import re
import logging
from typing import List, Tuple, Optional, Dict, Any, Union

# Third-party imports
try:
    from tabulate import tabulate
    HAS_TABULATE = True
except ImportError:
    HAS_TABULATE = False
    logging.info("Optional dependency 'tabulate' not found. Using basic table format.")

from skimage import io, color
from skimage.transform import resize
from skimage.feature import hog
from skimage.util import img_as_float
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA # Keep import for potential future use

# --- Logging Setup ---
# Configure logging to show informational messages
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

# --- Constants and Default Configuration ---

## @brief Default path to the training data directory.
DEFAULT_TRAIN_DIR: str = '../train'
## @brief Default path to the development (validation) data directory.
DEFAULT_DEV_DIR: str = '../dev'
## @brief Default filename for saving/loading trained models.
DEFAULT_MODEL_FILENAME: str = 'trained_model_image.pkl'
## @brief Default filename for saving prediction results.
DEFAULT_PREDICTIONS_FILENAME: str = 'image_predictions.txt'

# Default HOG Parameters
## @brief Default image resize dimensions before HOG extraction.
DEFAULT_RESIZE_DIM: Tuple[int, int] = (48, 48)
## @brief Default number of orientation bins for HOG.
DEFAULT_ORIENTATIONS: int = 12
## @brief Default size (in pixels) of a HOG cell.
DEFAULT_PPC: Tuple[int, int] = (4, 4)
## @brief Default size (in cells) of a HOG block.
DEFAULT_CPB: Tuple[int, int] = (2, 2)

# Default Classifier Parameters
## @brief Default C parameter (regularization strength) for LinearSVC.
DEFAULT_CLASSIFIER_C: float = 0.1
## @brief Fixed classifier instance used during the optimization mode for fair comparison.
# Uses LinearSVC for speed. dual=True is often recommended when n_features > n_samples.
OPTIMIZATION_CLASSIFIER = LinearSVC(C=DEFAULT_CLASSIFIER_C, random_state=42, dual=True, max_iter=5000)

# --- Parameter Lists for Optimization Mode ---
## @brief List of resize dimensions (height, width) to test during optimization.
OPTIMIZE_RESIZE_DIMS: List[Tuple[int, int]] = [(32, 32), (48, 48), (64, 64)]
## @brief List of HOG 'pixels per cell' values (height, width) to test during optimization.
OPTIMIZE_PIXELS_PER_CELL: List[Tuple[int, int]] = [(4, 4), (6, 6), (8, 8)]
## @brief List of HOG 'cells per block' values (height, width) to test during optimization.
OPTIMIZE_CELLS_PER_BLOCK: List[Tuple[int, int]] = [(2, 2), (3, 3)]
## @brief List of HOG orientation bins to test during optimization.
OPTIMIZE_ORIENTATIONS: List[int] = [8, 9, 12]

# Other Constants
## @brief Number of CPU jobs to use for multiprocessing tasks (currently only affects GridSearchCV if used).
N_JOBS: int = -1
## @brief Total number of classes (persons) expected in the dataset.
NUM_CLASSES: int = 31


# --- Helper Functions ---

def parse_tuple_arg(arg_string: List[str], expected_len: int = 2, type_caster=int) -> Tuple:
    """!
    @brief Parses a list of strings into a tuple of a specified type and length.
    Used for command-line arguments representing tuples like dimensions.

    @param arg_string List of strings from argparse (e.g., ['64', '64']).
    @param expected_len Expected number of elements in the tuple.
    @param type_caster Function to cast string elements (e.g., int, float).
    @return Tuple of converted values.
    @throws argparse.ArgumentTypeError If parsing fails or length is incorrect.
    """
    try:
        parts = [type_caster(p) for p in arg_string]
        if len(parts) != expected_len:
            raise argparse.ArgumentTypeError(f"Expected {expected_len} values, got {len(parts)}")
        return tuple(parts)
    except Exception as e:
        raise argparse.ArgumentTypeError(f"Invalid tuple format: '{' '.join(arg_string)}'. Error: {e}")

def parse_filename_for_params(filename: str) -> Tuple[Tuple[int, int], Tuple[int, int], Tuple[int, int], int, float]:
    """!
    @brief Extracts HOG parameters and optionally the SVM C value from a structured model filename.
    Assumes a naming convention like '..._R{resize}_P{ppc}CB{cpb}O{orientations}_C{svm_c}.pkl'.

    @param filename The path to the model file.
    @return Tuple containing (resize_dim, ppc, cpb, orientations, svm_c). Defaults are used if parsing fails.
    """
    logging.debug(f"Attempting to parse filename: {filename}")
    match = re.search(r'_R(\d+)_P(\d+)CB(\d+)O(\d+)(?:_C([\d\.]+))?', filename, re.IGNORECASE)
    resize_dim = DEFAULT_RESIZE_DIM
    ppc = DEFAULT_PPC
    cpb = DEFAULT_CPB
    orientations = DEFAULT_ORIENTATIONS
    svm_c = DEFAULT_CLASSIFIER_C

    if match:
        try:
            r = int(match.group(1))
            p = int(match.group(2))
            cb = int(match.group(3))
            o = int(match.group(4))
            resize_dim = (r, r)
            ppc = (p, p)
            cpb = (cb, cb)
            orientations = o
            if match.group(5): 
                svm_c = float(match.group(5))
            logging.info(f"Parsed params from filename: Resize={resize_dim}, PPC={ppc}, CPB={cpb}, Orien={orientations}, C={svm_c}")
        except Exception as e:
            logging.warning(f"Parsing HOG params partially failed for '{filename}': {e}. Using some defaults.")
    else:
        logging.warning(f"Could not parse HOG params from filename '{filename}'. Using all defaults.")
    return resize_dim, ppc, cpb, orientations, svm_c

def load_data(data_dir: str, expect_labels: bool = True) -> Tuple[Optional[List[str]], Optional[np.ndarray], Optional[List[str]]]:
    """!
    @brief Loads image paths, segment names, and optionally labels from structured directories.
    Expects subdirectories named with integer class labels (e.g., '1', '2', ...) if labels=True.
    Falls back to loading images directly from data_dir if no subdirectories are found.

    @param data_dir Path to the root data directory.
    @param expect_labels If True, attempts to parse labels from subdirectory names.
    @return Tuple of (image_paths, labels, segment_names). Returns None for labels if expect_labels is False
            or if labels cannot be determined. Returns None for all if data loading fails.
    """
    image_paths: List[str] = []
    labels: List[Optional[int]] = []
    segment_names: List[str] = []
    logging.info(f"Loading data from: {data_dir} {'(expecting labels)' if expect_labels else '(paths only)'}")
    if not os.path.isdir(data_dir):
        logging.error(f"Data directory not found: {data_dir}")
        return None, None, None

    class_dirs = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d)) and d.isdigit()])

    if class_dirs:
        logging.info(f"Found class subdirectories: {len(class_dirs)}")
        for class_label_str in class_dirs:
            try:
                class_label = int(class_label_str) if expect_labels else None
                current_dir = os.path.join(data_dir, class_label_str)
                for img_path in glob.glob(os.path.join(current_dir, '*.[pP][nN][gG]')): # Case-insensitive glob
                    if os.path.isfile(img_path): # Ensure it's a file
                        image_paths.append(img_path)
                        segment_names.append(os.path.splitext(os.path.basename(img_path))[0])
                        if expect_labels: 
                             labels.append(class_label)
            except ValueError: 
                logging.warning(f"Skipping non-integer subdir '{class_label_str}'")
            except Exception as e: 
                logging.error(f"Error loading {current_dir}: {e}")
    else:
        logging.info("No class subdirectories found, loading images directly.")
        found_files = glob.glob(os.path.join(data_dir, '*.[pP][nN][gG]'))
        if not found_files: logging.warning(f"No PNG images found directly in {data_dir}")
        if expect_labels and found_files: logging.warning("Cannot extract labels when loading directly.")
        for img_path in found_files:
            if os.path.isfile(img_path):
                image_paths.append(img_path)
                segment_names.append(os.path.splitext(os.path.basename(img_path))[0])
                if expect_labels: 
                    labels.append(None)

    if not image_paths: 
        logging.error(f"No PNG images found in {data_dir}") 
        return None, None, None
    logging.info(f"Found {len(image_paths)} image paths.")

    final_labels_array = None
    if expect_labels:
        if any(l is not None for l in labels):
            valid_mask = [(l is not None) for l in labels]
            if not all(valid_mask):
                logging.warning("Filtering out images loaded without labels.")
                image_paths = [p for i, p in enumerate(image_paths) if valid_mask[i]]
                segment_names = [s for i, s in enumerate(segment_names) if valid_mask[i]]
                final_labels_array = np.array([l for l in labels if l is not None], dtype=int)
            else:
                 final_labels_array = np.array(labels, dtype=int)
        # else final_labels_array remains None if no labels were found

    return image_paths, final_labels_array, segment_names

def extract_hog_features(image_path: str, resize_dim: Tuple[int, int], orientations: int,
                         pixels_per_cell: Tuple[int, int], cells_per_block: Tuple[int, int]) -> Optional[np.ndarray]:
    """!
    @brief Extracts Histogram of Oriented Gradients (HOG) features for a single image.
           Handles grayscale conversion and resizing.

    @param image_path Path to the input image file.
    @param resize_dim Target dimensions (height, width) to resize the image to before HOG.
    @param orientations Number of orientation bins for HOG.
    @param pixels_per_cell Size (height, width) of a HOG cell.
    @param cells_per_block Size (height, width) in cells of a HOG block for normalization.
    @return Numpy array of HOG features, or None if an error occurs.
    """
    try:
        img = io.imread(image_path)
        # Convert to grayscale if necessary
        if img.ndim == 3 and img.shape[2] >= 3: # Check for color channels
            img_gray = color.rgb2gray(img)
        elif img.ndim == 2:
            img_gray = img # Already grayscale
        else:
            # Handle alpha channel or unexpected formats if necessary
            logging.warning(f"Unexpected image format for HOG: {img.shape}, path: {image_path}. Trying to convert.")
            # Attempt conversion assuming first channel is intensity or take mean etc.
            if img.ndim == 3 and img.shape[2] == 1: 
                img_gray = img[:,:,0]
            elif img.ndim == 3 and img.shape[2] == 4: 
                img_gray = color.rgb2gray(color.rgba2rgb(img)) # Handle RGBA
            else: 
                raise ValueError(f"Unsupported image dimension/channels: {img.shape}")

        img_float = img_as_float(img_gray) # Ensure float type
        img_resized = resize(img_float, resize_dim, anti_aliasing=True)
        features = hog(img_resized, orientations=orientations, pixels_per_cell=pixels_per_cell,
                       cells_per_block=cells_per_block, visualize=False, feature_vector=True,
                       channel_axis=None) # channel_axis=None for 2D input
        return features
    except FileNotFoundError:
        logging.error(f"Image file not found: {image_path}")
        return None
    except Exception as e:
        logging.debug(f"Error processing HOG for {os.path.basename(image_path)}: {e}")
        return None

def preprocess_data_hog(image_paths: List[str], config_name: str, resize_dim: Tuple[int, int],
                        orientations: int, ppc: Tuple[int, int], cpb: Tuple[int, int]) -> Tuple[Optional[np.ndarray], Optional[List[int]]]:
    """!
    @brief Extracts HOG features for a list of image paths.

    @param image_paths List of paths to image files.
    @param config_name A string identifier for logging purposes (e.g., "Train", "Dev Eval").
    @param resize_dim Target dimensions for resizing.
    @param orientations Number of HOG orientation bins.
    @param ppc HOG Pixels Per Cell tuple.
    @param cpb HOG Cells Per Block tuple.
    @return Tuple containing (Numpy array of extracted features (N_success, feature_dim),
                             List of original indices corresponding to the successful extractions).
            Returns (None, None) if extraction fails completely.
    """
    features_list: List[np.ndarray] = []
    processed_indices: List[int] = [] # Original indices from image_paths
    error_count = 0
    logging.info(f"Extracting HOG for {len(image_paths)} images ({config_name})...")
    start_time = time.time()
    for i, path in enumerate(image_paths):
        features = extract_hog_features(path, resize_dim, orientations, ppc, cpb)
        if features is not None and features.size > 0:
            features_list.append(features)
            processed_indices.append(i)
        else:
            error_count += 1
        if (i + 1) % 100 == 0 or (i + 1) == len(image_paths):
             logging.debug(f"  Processed {i+1}/{len(image_paths)} images...")

    elapsed_time = time.time() - start_time
    logging.info(f"HOG extraction done in {elapsed_time:.2f}s.")

    if error_count > 0: 
        logging.warning(f"Failed to extract HOG for {error_count} images.")
    if not features_list: 
        logging.error(f"No HOG features could be extracted for config {config_name}.")
        return None, None

    X_hog = np.array(features_list)
    logging.info(f"  Successfully extracted HOG for {len(processed_indices)} images. Feature dimension: {X_hog.shape[1]}")
    return X_hog, processed_indices


def save_pipeline(pipeline: Pipeline, filename: str):
    """! @brief Saves the trained pipeline object using pickle. """
    logging.info(f"Saving pipeline to {filename}")
    try:
        with open(filename, 'wb') as f: pickle.dump(pipeline, f)
    except Exception as e: 
        logging.error(f"Error saving pipeline to {filename}: {e}")

def load_pipeline(filename: str) -> Optional[Pipeline]:
    """!
    @brief Loads a trained pipeline object from a pickle file.
    @param filename Path to the pickle file.
    @return The loaded scikit-learn Pipeline object, or None if loading fails.
    """
    logging.info(f"Loading pipeline from {filename}")
    if not os.path.exists(filename):
        logging.error(f"Pipeline file not found: {filename}")
        return None
    try:
        with open(filename, 'rb') as f: pipeline = pickle.load(f)
        if not isinstance(pipeline, Pipeline):
             logging.error(f"Loaded object from {filename} is not a scikit-learn Pipeline.")
             return None
        return pipeline
    except Exception as e: 
        logging.error(f"Error loading pipeline from {filename}: {e}")
        return None

def predict_single_image(image_path: str, pipeline: Pipeline, resize_dim: Tuple[int, int],
                         orientations: int, ppc: Tuple[int, int], cpb: Tuple[int, int]) -> Tuple[Any, np.ndarray]:
    """!
    @brief Predicts class and log probabilities for a single image using a pre-trained pipeline.
           Handles HOG extraction internally based on provided parameters.

    @param image_path Path to the input image.
    @param pipeline The loaded (trained) scikit-learn Pipeline object.
    @param resize_dim HOG parameter: Target dimensions for resizing.
    @param orientations HOG parameter: Number of orientation bins.
    @param ppc HOG parameter: Pixels Per Cell tuple.
    @param cpb HOG parameter: Cells Per Block tuple.
    @return Tuple of (predicted_label, log_probabilities_array).
            Log probabilities will be NaN if the classifier doesn't support probability estimation.
            Predicted label can be NaN if prediction fails.
    """
    features = extract_hog_features(image_path, resize_dim, orientations, ppc, cpb)

    if features is None:
        logging.warning(f"Could not extract HOG for {image_path}. Returning NaN.")
        return np.nan, np.full(NUM_CLASSES, np.nan)

    features_2d = features.reshape(1, -1)
    log_probs_ordered = np.full(NUM_CLASSES, np.nan) # Initialize full NaN array

    # Predict hard decision
    try:
        hard_decision = pipeline.predict(features_2d)[0]
    except Exception as e:
        logging.error(f"Error predicting {image_path}: {e}")
        return np.nan, log_probs_ordered # Return NaNs

    # Try to get probability scores
    final_estimator = pipeline.steps[-1][1] # Get the actual classifier from the pipeline
    log_probs = None
    try:
        if hasattr(final_estimator, 'predict_log_proba'):
            log_probs = pipeline.predict_log_proba(features_2d)[0]
        elif hasattr(final_estimator, 'predict_proba'):
            epsilon = 1e-9
            probs = pipeline.predict_proba(features_2d)[0]
            # Prevent log(0) for zero probabilities
            log_probs = np.log(np.maximum(probs, epsilon))

        # Map scores to the correct class indices (1 to NUM_CLASSES)
        if log_probs is not None:
             model_classes = getattr(pipeline, 'classes_', []) # Get classes learned by the model
             for i, cls_label in enumerate(model_classes):
                  if 1 <= cls_label <= NUM_CLASSES:
                       # Use try-except for index safety, though lengths should match
                       try:
                            log_probs_ordered[cls_label - 1] = log_probs[i]
                       except IndexError:
                             logging.warning(f"Index mismatch mapping probabilities for class {cls_label}")
                  else:
                       logging.warning(f"Model predicted for class {cls_label}, outside range 1-{NUM_CLASSES}")

    except Exception as e:
        logging.warning(f"Could not get probabilities for {image_path}: {e}")
        # log_probs_ordered remains NaN

    return hard_decision, log_probs_ordered


# --- Mode Functions ---

def run_optimization(args: argparse.Namespace):
    """!
    @brief Runs the HOG parameter optimization loop.
           Tests combinations of HOG parameters, trains a fixed classifier,
           evaluates on the dev set, reports results, and saves the best model.

    @param args Parsed command-line arguments.
    """
    try:
        train_paths, y_train_all, _ = load_data(args.train_dir, expect_labels=True)
        dev_paths, y_dev_all, _ = load_data(args.dev_dir, expect_labels=True)
        if train_paths is None or dev_paths is None or y_train_all is None or y_dev_all is None:
            logging.error("Cannot run optimization: data loading failed.")
            return
    except FileNotFoundError as e: 
        logging.error(e)
        return

    param_combinations = list(itertools.product(
        args.resize_dims, args.pixels_per_cell, args.cells_per_block, args.orientations
    ))

    results = []
    total_start_time = time.time()
    fixed_svm_c = args.svm_c_optimize
    logging.info(f"Starting HOG optimization ({len(param_combinations)} combos) with fixed SVM C={fixed_svm_c}...")

    # Pre-cache features
    train_features_cache: Dict[tuple, Tuple[Optional[np.ndarray], Optional[List[int]]]] = {}
    dev_features_cache: Dict[tuple, Tuple[Optional[np.ndarray], Optional[List[int]]]] = {}
    logging.info("Pre-extracting HOG features...")
    for params in param_combinations:
        resize_dim, ppc, cpb, orientations = params
        param_key = (resize_dim, ppc, cpb, orientations)
        if param_key not in train_features_cache:
            logging.info(f" Extracting for: R:{resize_dim}, PPC:{ppc}, CPB:{cpb}, O:{orientations}")
            X_train_hog, train_idx = preprocess_data_hog(train_paths, "Train", resize_dim, orientations, ppc, cpb)
            train_features_cache[param_key] = (X_train_hog, train_idx)
            X_dev_hog, dev_idx = preprocess_data_hog(dev_paths, "Dev", resize_dim, orientations, ppc, cpb)
            dev_features_cache[param_key] = (X_dev_hog, dev_idx)
    logging.info("Feature extraction complete.")

    # Run evaluation loop
    for i, params in enumerate(param_combinations):
        resize_dim, ppc, cpb, orientations = params
        param_key = (resize_dim, ppc, cpb, orientations)
        config_name = f"R:{resize_dim}, PPC:{ppc}, CPB:{cpb}, Orien:{orientations}"
        iteration_start_time = time.time()
        logging.info(f"--- Testing Combo {i+1}/{len(param_combinations)}: {config_name} ---")

        X_train_hog, train_processed_indices = train_features_cache[param_key]
        X_dev_hog, dev_processed_indices = dev_features_cache[param_key]

        # Validation checks
        if X_train_hog is None or X_dev_hog is None or train_processed_indices is None or dev_processed_indices is None:
            logging.warning(" Skipping: Feature extraction previously failed.")
            results.append({'params': params, 'accuracy': -1.0, 'error': 'Feature extraction failed'})
            continue
        try:
            y_train = y_train_all[train_processed_indices]
            y_dev = y_dev_all[dev_processed_indices]
        except IndexError: 
            logging.warning(" Skipping: Label index error.")
            results.append({'params': params, 'accuracy': -1.0, 'error': 'Label index error'})
            continue
        if len(X_train_hog) < 5 or len(X_dev_hog) == 0 or len(y_dev) != len(X_dev_hog):
             logging.warning(" Skipping: Insufficient valid data.")
             results.append({'params': params, 'accuracy': -1.0, 'error': 'Insufficient data'})
             continue
        logging.info(f"  Train shape: {X_train_hog.shape}, Dev shape: {X_dev_hog.shape}")

        # Define fixed pipeline for comparison
        pipeline = Pipeline([('scaler', StandardScaler()), ('classifier', OPTIMIZATION_CLASSIFIER)])
        try:
            logging.info("  Training pipeline...")
            t0=time.time()
            pipeline.fit(X_train_hog, y_train)
            logging.info(f"  Training done in {time.time()-t0:.2f}s.")
            logging.info("  Evaluating...")
            t0=time.time()
            y_pred_dev = pipeline.predict(X_dev_hog)
            accuracy = accuracy_score(y_dev, y_pred_dev)
            logging.info(f"  Evaluation done in {time.time()-t0:.2f}s -> Accuracy: {accuracy:.4f}")
            results.append({'params': params, 'accuracy': accuracy, 'error': None, 'feat_dim': X_train_hog.shape[1]})
        except Exception as e: 
            logging.error(f"  Error fit/predict: {e}")
            results.append({'params': params, 'accuracy': -1.0, 'error': str(e)})
        logging.info(f"  Iteration finished in {time.time() - iteration_start_time:.2f}s")

    total_time = time.time() - total_start_time
    logging.info(f"--- Optimization Finished in {total_time:.2f} seconds ---")

    # --- Report Results ---
    best_result = None
    if results:
        results.sort(key=lambda x: x['accuracy'], reverse=True)
        best_result = results[0]
        if best_result['accuracy'] >= 0:
            logging.info("\nBest Configuration Found:")
            logging.info(f"  Parameters (Resize, PPC, CPB, Orien): {best_result['params']}")
            logging.info(f"  HOG Feature Dimension: {best_result.get('feat_dim', 'N/A')}")
            logging.info(f"  Dev Set Accuracy: {best_result['accuracy']:.4f}")
        else: 
            logging.warning("No successful configurations found.")
    # Print summary table to console
    print("\n--- Optimization Summary Table ---")
    headers = ["Resize", "PPC", "CPB", "Orien", "Feat Dim", "Accuracy", "Error"]
    table_data = [[str(p[0]), str(p[1]), str(p[2]), p[3], res.get('feat_dim', '-'), f"{res['accuracy']:.4f}" if res['accuracy'] >= 0 else "FAIL", res['error'] or ""] for res in results for p in [res['params']]]
    table_data.sort(key=lambda row: float(row[5]) if row[5] != "FAIL" else -1.0, reverse=True)
    if HAS_TABULATE: 
        print(tabulate(table_data, headers=headers, tablefmt="grid"))
    else: 
        print(" | ".join(headers))
        print("-" * 80)
        [print(f" {row[0]:<10} | {row[1]:<8} | {row[2]:<8} | {row[3]:<5} | {row[4]:<8} | {row[5]:<8} | {row[6]}") for row in table_data]

    # Retrain and save best model
    if best_result and best_result['accuracy'] >= 0:
        logging.info("Retraining best model configuration and saving...")
        best_params = best_result['params']
        resize_dim, ppc, cpb, orientations = best_params
        param_key = (resize_dim, ppc, cpb, orientations)
        X_train_hog, train_processed_indices = train_features_cache.get(param_key, (None, None))
        if X_train_hog is None: 
            logging.warning("Retraining required re-extraction.")
            X_train_hog, train_processed_indices = preprocess_data_hog(train_paths, "Best Train Final", resize_dim, orientations, ppc, cpb)
        if X_train_hog is not None and train_processed_indices is not None:
             try:
                y_train = y_train_all[train_processed_indices]
                best_pipeline = Pipeline([ ('scaler', StandardScaler()), ('classifier', OPTIMIZATION_CLASSIFIER) ])
                best_pipeline.fit(X_train_hog, y_train)
                r, p, cb, o = resize_dim[0], ppc[0], cpb[0], orientations
                c = args.svm_c_optimize
                save_path = f'image_pipeline_svm_hog_BEST_R{r}_P{p}CB{cb}O{o}_C{c}.pkl'
                save_pipeline(best_pipeline, save_path)
             except Exception as e: 
                logging.error(f"Error retraining/saving best pipeline: {e}")
        else: logging.error("Could not retrain best model: data issues.")


def run_train(args: argparse.Namespace):
    """!
    @brief Trains an SVM model with specified HOG parameters, saves the pipeline,
           and evaluates the saved model on the development set.

    @param args Parsed command-line arguments containing training parameters
                (resize, ppc, cpb, orientations, svm_c, output_model, train_dir, dev_dir).
    """
    logging.info("--- Training & Evaluating Mode ---")
    resize_dim = args.resize
    ppc = args.ppc
    cpb = args.cpb
    orientations = args.orientations
    svm_c = args.svm_c
    config_name = f"R:{resize_dim}, PPC:{ppc}, CPB:{cpb}, Orien:{orientations}, C:{svm_c}"
    logging.info(f"Using parameters: {config_name}")
    logging.info(f"Output pipeline path: {args.output_model}")

    # --- Training ---
    try: 
        train_paths, y_train_all, _ = load_data(args.train_dir, True)
        assert train_paths and y_train_all is not None
    except Exception as e: 
        logging.error(f"Failed loading train data: {e}")
        return
    X_train_hog, train_processed_indices = preprocess_data_hog(train_paths, "Train", resize_dim, orientations, ppc, cpb)
    if X_train_hog is None: 
        logging.error("Training failed: HOG error.")
        return
    try: 
        y_train = y_train_all[train_processed_indices]
        assert len(y_train) > 0
    except (IndexError, AssertionError): 
        logging.error("Training failed: Label filtering error or no valid data.")
        return
    logging.info(f"Training data shape: {X_train_hog.shape}")

    # Define classifier - Use LinearSVC for consistency with optimize mode default
    # Consider changing to SVC(probability=True) if probabilities are essential for final output
    train_classifier = LinearSVC(C=svm_c, random_state=42, dual=True, max_iter=5000)
    pipeline = Pipeline([('scaler', StandardScaler()), ('classifier', train_classifier)])
    try:
        logging.info("Training pipeline...")
        t0 = time.time()
        pipeline.fit(X_train_hog, y_train)
        logging.info(f"Training done in {time.time()-t0:.2f}s.")
        save_pipeline(pipeline, args.output_model) # Save the trained pipeline
    except Exception as e: 
        logging.error(f"Error during training: {e}")
        return

    print("-" * 30)

    # --- Evaluation ---
    logging.info("--- Evaluating Trained Model on Dev Set ---")
    # Create temporary args for run_evaluate, pointing to the model just saved
    eval_args = argparse.Namespace(input_model=args.output_model, dev_dir=args.dev_dir)
    run_evaluate(eval_args)
    print("-" * 30)
    logging.info("Training and Evaluation complete.")


def run_predict(args: argparse.Namespace):
    """!
    @brief Loads a trained pipeline and predicts identities for images in a directory.
           Outputs predictions in the specified format.

    @param args Parsed command-line arguments containing prediction parameters
                (input_model, input_data, output_predictions).
    """
    logging.info("--- Prediction Mode ---")
    logging.info(f"Loading model: {args.input_model}")
    logging.info(f"Input data dir: {args.input_data}")
    logging.info(f"Output predictions file: {args.output_predictions}")

    pipeline = load_pipeline(args.input_model)
    if pipeline is None: return

    # Parse HOG params from filename for consistent feature extraction
    resize_dim, ppc, cpb, orientations, _ = parse_filename_for_params(args.input_model)
    logging.info(f"Using HOG parameters from model: R:{resize_dim}, PPC:{ppc}, CPB:{cpb}, Orien:{orientations}")

    image_paths, _, segment_names = load_data(args.input_data, expect_labels=False)
    if image_paths is None or segment_names is None:
        logging.error("Prediction failed: Cannot load input data.")
        return

    results_lines = []
    logging.info(f"Predicting for {len(image_paths)} images...")
    predict_start_time = time.time()
    success_count = 0
    for i, (img_path, seg_name) in enumerate(zip(image_paths, segment_names)):
        if (i + 1) % 100 == 0 or i == len(image_paths)-1:
            logging.info(f"  Processed {i+1}/{len(image_paths)} images...")

        hard_decision, log_probs = predict_single_image(
            img_path, pipeline, resize_dim, orientations, ppc, cpb
        )

        # Format output line according to project spec
        log_prob_str = " ".join([f"{lp:.6f}" if not np.isnan(lp) else "nan" for lp in log_probs])
        hard_decision_str = str(int(hard_decision)) if not np.isnan(hard_decision) else "nan"
        results_lines.append(f"{seg_name} {hard_decision_str} {log_prob_str}")
        if not np.isnan(hard_decision): 
            success_count += 1

    predict_time = time.time() - predict_start_time
    logging.info(f"Prediction finished in {predict_time:.2f}s. Processed {success_count}/{len(image_paths)} images.")

    try:
        with open(args.output_predictions, 'w') as f:
            for line in results_lines: 
                f.write(line + '\n')
        logging.info(f"Predictions saved to {args.output_predictions}")
    except Exception as e: logging.error(f"Error saving predictions: {e}")


def run_evaluate(args: argparse.Namespace):
    """!
    @brief Loads a trained pipeline and evaluates its performance on the development set.

    @param args Parsed command-line arguments containing evaluation parameters
                (input_model, dev_dir).
    """
    logging.info("--- Evaluate Mode ---")
    logging.info(f"Loading model: {args.input_model}")
    logging.info(f"Evaluating on data from: {args.dev_dir}")

    pipeline = load_pipeline(args.input_model)
    if pipeline is None: 
        return

    # Parse HOG params from model filename for consistent feature extraction
    resize_dim, ppc, cpb, orientations, _ = parse_filename_for_params(args.input_model)
    logging.info(f"Using HOG parameters from model: R:{resize_dim}, PPC:{ppc}, CPB:{cpb}, Orien:{orientations}")

    # Load Dev Data with labels
    dev_paths, y_dev_all, _ = load_data(args.dev_dir, expect_labels=True)
    if dev_paths is None or y_dev_all is None:
        logging.error("Evaluation failed: Cannot load dev data.") 
        return

    # Extract HOG features for Dev Data
    X_dev_hog, dev_processed_indices = preprocess_data_hog(
        dev_paths, "Dev Eval", resize_dim, orientations, ppc, cpb
    )
    if X_dev_hog is None: 
        logging.error("Evaluation failed: HOG error on dev.")
        return

    # Filter dev labels based on successful HOG extraction
    if dev_processed_indices is None or len(dev_processed_indices) == 0:
        logging.error("Evaluation failed: No valid dev images after HOG.") 
        return
    try: 
        y_dev_true = y_dev_all[dev_processed_indices]
    except IndexError: 
        logging.error("Evaluation failed: Index error filtering dev labels.") 
        return
    if len(y_dev_true) != len(X_dev_hog): 
        logging.error(f"Eval failed: Label/feature count mismatch.") 
        return

    logging.info(f"Dev data shape for evaluation: {X_dev_hog.shape}")

    # Predict on Dev Set
    try:
        logging.info("Predicting on dev set...") 
        t0 = time.time()
        y_pred_dev = pipeline.predict(X_dev_hog)
        logging.info(f"Prediction done in {time.time()-t0:.2f}s.")
    except Exception as e: 
        logging.error(f"Error during dev set prediction: {e}")
        return

    # Calculate and Print Metrics
    accuracy = accuracy_score(y_dev_true, y_pred_dev)
    logging.info(f"Accuracy on Dev Set: {accuracy:.4f}")
    # Print classification report directly to stdout for visibility
    print("\n--- Classification Report (Dev Set) ---")
    all_possible_labels = np.arange(1, NUM_CLASSES + 1)
    present_labels = sorted(list(np.unique(y_dev_true)))
    report_labels = sorted(list(set(present_labels) | set(all_possible_labels)))
    print(classification_report(y_dev_true, y_pred_dev, labels=report_labels, zero_division=0))


# --- Argument Parser Setup ---
def create_parser() -> argparse.ArgumentParser:
    """! @brief Creates the argument parser for the command-line interface. """
    parser = argparse.ArgumentParser(
        description="Train, optimize, evaluate, or predict using HOG features and SVM.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter # Show default values in help
    )
    parser.add_argument('--train-dir', type=str, default=DEFAULT_TRAIN_DIR, help="Path to the training data directory.")
    parser.add_argument('--dev-dir', type=str, default=DEFAULT_DEV_DIR, help="Path to the development (validation) data directory.")

    # --- Mode Selection ---
    mode_group = parser.add_mutually_exclusive_group(required=True)
    mode_group.add_argument('--optimize', action='store_true', help="Run HOG parameter optimization loop.")
    mode_group.add_argument('--train', action='store_true', help="Train a single model with specified HOG parameters.")
    mode_group.add_argument('--evaluate', action='store_true', help="Evaluate a trained model on the dev set.")
    mode_group.add_argument('--predict', action='store_true', help="Predict using a pre-trained model on new data.")

    # --- Mode-Specific Arguments ---
    optimize_group = parser.add_argument_group('Optimization Options (for --optimize)')
    optimize_group.add_argument('--svm-c-optimize', type=float, default=DEFAULT_CLASSIFIER_C, help="Fixed SVM C value for optimization comparison.")

    train_group = parser.add_argument_group('Training Options (for --train)')
    train_group.add_argument('--resize', type=int, nargs=2, metavar=('H', 'W'), default=list(DEFAULT_RESIZE_DIM), help="Resize dimension (H W).")
    train_group.add_argument('--ppc', type=int, nargs=2, metavar=('Y', 'X'), default=list(DEFAULT_PPC), help="Pixels Per Cell (Y X).")
    train_group.add_argument('--cpb', type=int, nargs=2, metavar=('Y', 'X'), default=list(DEFAULT_CPB), help="Cells Per Block (Y X).")
    train_group.add_argument('--orientations', type=int, default=DEFAULT_ORIENTATIONS, help="Number of HOG orientations.")
    train_group.add_argument('--svm-c', type=float, default=DEFAULT_CLASSIFIER_C, help="SVM C parameter.")
    train_group.add_argument('--output-model', type=str, default=DEFAULT_MODEL_FILENAME, help="Path to save the trained pipeline model.")

    evaluate_group = parser.add_argument_group('Evaluation Options (for --evaluate)')
    # Changed metavar for clarity
    evaluate_group.add_argument('--input-model-eval', type=str, metavar='MODEL_PATH', default=DEFAULT_MODEL_FILENAME, help="Path to the trained pipeline model file (.pkl) to evaluate.")

    predict_group = parser.add_argument_group('Prediction Options (for --predict)')
    predict_group.add_argument('--input-model-pred', type=str, metavar='MODEL_PATH', default=DEFAULT_MODEL_FILENAME,  help="Path to the trained pipeline model file (.pkl) for prediction.")
    predict_group.add_argument('--input-data', type=str, default=DEFAULT_DEV_DIR, help="Path to the directory containing images to predict.")
    predict_group.add_argument('--output-predictions', type=str, default=DEFAULT_PREDICTIONS_FILENAME, help="Path to save the output prediction file.")

    return parser

# --- Main Execution ---
def main():
    """!
    @brief Main function to parse arguments and dispatch execution to the appropriate mode function.
    """
    parser = create_parser()
    args = parser.parse_args()

    # Argument validation and internal variable assignment for evaluate/predict modes
    if args.train:
        args.resize = parse_tuple_arg(args.resize)
        args.ppc = parse_tuple_arg(args.ppc)
        args.cpb = parse_tuple_arg(args.cpb)
        if not args.output_model: 
            parser.error("--output-model required for --train")
    elif args.evaluate:
        if not args.input_model_eval: 
            parser.error("--input-model-eval required for --evaluate")
        args.input_model = args.input_model_eval # Use common internal name
    elif args.predict:
        if not args.input_model_pred: 
            parser.error("--input-model-pred required for --predict")
        if not args.input_data: 
            parser.error("--input-data required for --predict")
        args.input_model = args.input_model_pred # Use common internal name

    # Dispatch based on the selected mode
    if args.optimize:
        # Assign HOG param lists for optimization mode from global constants
        args.resize_dims = OPTIMIZE_RESIZE_DIMS
        args.pixels_per_cell = OPTIMIZE_PIXELS_PER_CELL
        args.cells_per_block = OPTIMIZE_CELLS_PER_BLOCK
        args.orientations = OPTIMIZE_ORIENTATIONS
        # The fixed C value is accessed via args.svm_c_optimize inside run_optimize
        run_optimization(args)
    elif args.train:
        run_train(args)
    elif args.evaluate:
        run_evaluate(args)
    elif args.predict:
        run_predict(args)
    else:
         # This state should not be reachable due to the required mode group
         logging.error("No execution mode selected. Use --help for options.")
         parser.print_help()


if __name__ == "__main__":
    main()