# Importy

In [None]:
import os
import glob
import numpy as np
import matplotlib as plt
from scipy.io import wavfile
from numpy.random import randint
import ikrlib
import os
import glob
import pickle
import numpy as np
import ikrlib 
from sklearn.mixture import GaussianMixture
from sklearn.exceptions import ConvergenceWarning
import joblib
import os



# definicie

In [22]:
# constanty ---
MODEL_DIR = 'modelsAudio'
AUDIO_NUM_GAUSSIANS = 8  
AUDIO_EM_ITERATIONS = 20 
USE_DIAGONAL_COV = False 
os.makedirs(MODEL_DIR, exist_ok=True)
np.random.seed(29) #randint, nech sa nepokazi ratanie

# ulozenie a nacitanie modelov

In [23]:


def save_model(model, filename):
    try:
        joblib.dump(model, filename)
        print(f"Saved {filename}")
    except Exception as e:
        print(f"Error saving model: {e}")

def load_model(filename):
    if not os.path.exists(filename):
        print(f"{filename} not found.")
        return None
    try:
        model = joblib.load(filename)
        print(f"Model loaded {filename}")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

# pre-processing

In [24]:
def filter_silence(mfcc_features, energy_threshold=0.10):
   
    if mfcc_features is None or mfcc_features.shape[0] == 0:
        return mfcc_features
    

    energy = mfcc_features[:, 0]
    energy_normalized = (energy - np.min(energy)) / (np.max(energy) - np.min(energy) + 1e-10)
    

    voiced_frames = energy_normalized > energy_threshold
    

    if np.any(voiced_frames):
        return mfcc_features[voiced_frames, :]
    else:
        top_frames = max(1, int(0.2 * mfcc_features.shape[0]))
        top_indices = np.argsort(energy_normalized)[-top_frames:]
        return mfcc_features[top_indices, :]


def classify_audio(features, gmm_models):
    "klasifikovanie priznakov z MFFC , predikujem najprav. classu"
    best_score = -np.inf
    best_class = None

    if features is None or features.shape[0] == 0:
         return -1

    for class_id, gmm in gmm_models.items():
        if gmm is not None:
            try:
                # log prav na vzorku 
                avg_log_likelihood = gmm.score(features)

                # ak je lepsie, aktualizujeme do best score a class
                if avg_log_likelihood > best_score:
                    best_score = avg_log_likelihood
                    best_class = class_id
            except Exception as e:
                # ak nesedia dimenzie
                continue

    # ak sa nenasla trieda
    if best_class is None:
        return -1 

    return best_class

def extract_mfcc_features_for_classes(base_dir, class_ids=range(1, 32)):
   
    mfcc_features_by_class = {i: [] for i in class_ids}

    for class_id in class_ids:
        class_dir = os.path.join(base_dir, str(class_id))
        wav_files_in_class = []

        if os.path.isdir(class_dir):
            wav_files_in_class = glob.glob(os.path.join(class_dir, '*.wav'))

        if not wav_files_in_class:
            continue 
        
        mfcc_dict = ikrlib.wav16khz2mfcc(class_dir)
        class_features = []
        for wav_path in wav_files_in_class:
            if wav_path in mfcc_dict:
                mfcc_data = mfcc_dict[wav_path]

                if mfcc_data is not None and mfcc_data.shape[0] > 0:
                    processed_data = filter_silence(mfcc_data)
                    # vysledok

                    class_features.append(processed_data)

        mfcc_features_by_class[class_id] = class_features

    return mfcc_features_by_class



# trening gaussovky

In [25]:
def train_single_gmm_model_sklearn(all_features_for_class, num_gaussians, num_iterations, use_diag_cov):
    "scikit learn trening jedneho gmm modelu"
    if all_features_for_class is None or all_features_for_class.shape[0] < num_gaussians:
        print(f"Varovanie: Nedostatok dát pre trénovanie GMM ({all_features_for_class.shape[0] if all_features_for_class is not None else 0} vzoriek, potrebných aspoň {num_gaussians}). Preskakujem.")
        return None

    # Určenie typu kovariančnej matice pre scikit-learn
    covariance_type = 'diag' if use_diag_cov else 'full'

    try:
        # trenovanie
        gmm = GaussianMixture(
            n_components=num_gaussians,
            covariance_type=covariance_type,
            max_iter=num_iterations,
            n_init=1,
            random_state=29,
            reg_covar=1e-6, # singular matice fix error
            verbose=0,  
            verbose_interval=10
        )
        gmm.fit(all_features_for_class)

        print(f"GMM model natrénovaný. Log-likelihood: {gmm.lower_bound_:.4f}") # lower bound je approx. log-likelihood
        return gmm 

    except Exception as e:
        print(e)
        return None
    

def train_all_gmm_models_sklearn(mfcc_data_dict, num_gauss, num_iter, use_diag):
    "gmm modely vsetky pre kazdu triedu"
    print("train_all_gmm_models_sklearn debug start\n")
    audio_gmms = {}
#jednoduchy foreach cez jednotlive classy, kde spajam priznaky a natrenujem gmm na nich
    for class_id in range(1, 32):
        mfcc_list_for_class = mfcc_data_dict.get(class_id, [])

        valid_mfcc_list = []
        for mfcc in mfcc_list_for_class:
            if mfcc is not None and mfcc.shape[0] > 0:
                valid_mfcc_list.append(mfcc)

        if not valid_mfcc_list:
            audio_gmms[class_id] = None
            continue

        all_features_for_class = np.vstack(valid_mfcc_list)
        print(f"trenovanie - trieda{class_id}, ma {all_features_for_class.shape[0]} features...")
        gmm = train_single_gmm_model_sklearn(all_features_for_class, num_gauss, num_iter, use_diag)
        audio_gmms[class_id] = gmm 

    return audio_gmms




# testovacie funkcie, evaulacia

In [26]:
def test_gmm_model():
    # load
    model_filename = f'audio_gmm_{AUDIO_NUM_GAUSSIANS}g_{AUDIO_EM_ITERATIONS}i{USE_DIAGONAL_COV}.pkl'
    AUDIO_GMM_MODEL_FILE = os.path.join(MODEL_DIR, model_filename)
    trained_audio_gmms = load_model(AUDIO_GMM_MODEL_FILE)
    
    if not trained_audio_gmms:
        return
    
    # extract
    test_mfcc_data =extract_mfcc_features_for_classes(base_dir="test")
    
    # Eval
    total_correct = 0
    total_samples = 0
    class_stats = {i: {'correct': 0, 'total': 0} for i in range(1, 32)}
    
    print("\nTesting GMM models...")
    
    for true_class, features_list in test_mfcc_data.items():
        for features in features_list:
            if features is not None and features.shape[0] > 0:
                # Classify the sample
                pred_class = classify_audio(features, trained_audio_gmms)
                
                # pridam jedno do celkovych
                class_stats[true_class]['total'] += 1
                if pred_class == true_class:
                    total_correct += 1
                    class_stats[true_class]['correct'] += 1 #pridam jedno ak spravne
                total_samples += 1
    
    accuracy = total_correct / total_samples if total_samples > 0 else 0 #celkova presnost
    
    print(f"\nvysledky modelu")
    print(f"presnost {accuracy * 100:.2f}%")
    
    print("\npresnost class:") #per class , correct / total pre kazdy array prvok 
    for class_id in sorted(class_stats.keys()):
        total = class_stats[class_id]['total']
        correct = class_stats[class_id]['correct']
        if total > 0:
            class_accuracy = correct / total
            print(f"class {class_id}: {class_accuracy * 100:.2f}%")

In [None]:
def process_test_wav_files_sklearn(test_dir, gmm_models, output_file):
    """Spracuje wav subory, klasifikuje a zaroven vyhodnoti ulozi"""
    
    # Extrakcia MFCC
    test_mfcc_data = extract_mfcc_features_for_classes(test_dir)
    
    results = []
    total_correct = 0
    total_samples = 0

    print(f"\nSpracovanie testovacích súborov v adresári: {test_dir}")
    
    # each class
    for class_id, features_list in test_mfcc_data.items():
        class_dir = os.path.join(test_dir, str(class_id))
        mfcc_dict = ikrlib.wav16khz2mfcc(class_dir)
        
        # nazvy suborov
        file_paths = list(mfcc_dict.keys())
        
        for idx, features in enumerate(features_list):
          
            if idx < len(file_paths):
                
                file_name = os.path.splitext(os.path.basename(file_paths[idx]))[0]
            else:
                file_name = f"class_{class_id}_sample_{idx}"
            
            if features is not None and features.shape[0] > 0:
                # klasifikac
                predicted_class = classify_audio(features, gmm_models)
                
                # skore
                scores = []
                for i in range(1, 32):
                    if i in gmm_models and gmm_models[i] is not None:
                        try:
                            score = gmm_models[i].score(features)
                            scores.append(score)
                        except:
                            scores.append(float('-inf'))
                    else:
                        scores.append(float('-inf'))
                
                results.append((file_name, predicted_class, scores))
                
                # aktualizacia
                if predicted_class == class_id:
                    total_correct += 1
                total_samples += 1
            else:
                # osetrenie
                print(f"Varovanie: Prázdne príznaky pre súbor {file_name}")
                results.append((file_name, -1, [float('-inf')] * 31))
                total_samples += 1
    
    # zapis do suboru
    print(f"\nZapisujem výsledky do súboru: {output_file}")
    with open(output_file, 'w') as f:
        for file_name, pred_class, score_values in results:
            score_str = ' '.join([f"{score:.6f}" for score in score_values])
            f.write(f"{file_name} {pred_class} {score_str}\n")
    
    # vypis presnosti
    if total_samples > 0:
        accuracy = total_correct / total_samples
        print(f"\nvysledky:")
        print(f"presnost total {accuracy * 100:.2f}% ({total_correct}/{total_samples})")
    else:
        print("\n0")
    
    return results

# TRENOVANIE MAIN

In [29]:

print("Pipeline pre trénovanie audio modelu (scikit-learn GMM)...")

# EXTRAKCIA PRIZNAKOV MFCC
train_mfcc_data = train_mfcc_data = extract_mfcc_features_for_classes(base_dir="train") # Táto funkcia by mala volať filter_silence

# DEFINICIA MENA MODELU NA ULOZENIE DO ModelsAUDIO
model_filename = f'audio_gmm_sklearn_{AUDIO_NUM_GAUSSIANS}g_{AUDIO_EM_ITERATIONS}i{"diag" if USE_DIAGONAL_COV else "full"}.pkl'
AUDIO_GMM_MODEL_FILE = os.path.join(MODEL_DIR, model_filename)

# trening modelov

trained_audio_gmms_sklearn = train_all_gmm_models_sklearn(
    train_mfcc_data,
    AUDIO_NUM_GAUSSIANS,
    AUDIO_EM_ITERATIONS,
    USE_DIAGONAL_COV
)

#ulozenie modelu na pouzitie 

save_model(trained_audio_gmms_sklearn, AUDIO_GMM_MODEL_FILE)


print("\nPipeline pre trénovanie audio modelu (scikit-learn GMM) dokončená.")




Pipeline pre trénovanie audio modelu (scikit-learn GMM)...
Processing file:  train\1\f401_01_f12_i0_0.wav
Processing file:  train\1\f401_01_p01_i0_0.wav
Processing file:  train\1\f401_02_f12_i0_0.wav
Processing file:  train\1\f401_02_p01_i0_0.wav
Processing file:  train\1\f401_03_f12_i0_0.wav
Processing file:  train\1\f401_03_p01_i0_0.wav
Processing file:  train\2\f402_01_f12_i0_0.wav
Processing file:  train\2\f402_01_p01_i0_0.wav
Processing file:  train\2\f402_02_f12_i0_0.wav
Processing file:  train\2\f402_02_p01_i0_0.wav
Processing file:  train\2\f402_03_f12_i0_0.wav
Processing file:  train\2\f402_03_p01_i0_0.wav
Processing file:  train\3\f403_01_f12_i0_0.wav
Processing file:  train\3\f403_01_p01_i0_0.wav
Processing file:  train\3\f403_02_f12_i0_0.wav
Processing file:  train\3\f403_02_p01_i0_0.wav
Processing file:  train\3\f403_03_f12_i0_0.wav
Processing file:  train\3\f403_03_p01_i0_0.wav
Processing file:  train\4\f404_01_f12_i0_0.wav
Processing file:  train\4\f404_01_p01_i0_0.wav
P



GMM model natrénovaný. Log-likelihood: -7.1557
trenovanie - trieda5, ma 7902 features...
GMM model natrénovaný. Log-likelihood: -8.3259
trenovanie - trieda6, ma 6614 features...
GMM model natrénovaný. Log-likelihood: -7.2480
trenovanie - trieda7, ma 5393 features...
GMM model natrénovaný. Log-likelihood: -6.8909
trenovanie - trieda8, ma 6862 features...




GMM model natrénovaný. Log-likelihood: -6.7735
trenovanie - trieda9, ma 6532 features...
GMM model natrénovaný. Log-likelihood: -8.3078
trenovanie - trieda10, ma 6267 features...
GMM model natrénovaný. Log-likelihood: -8.0559
trenovanie - trieda11, ma 5804 features...
GMM model natrénovaný. Log-likelihood: -6.8995
trenovanie - trieda12, ma 5899 features...
GMM model natrénovaný. Log-likelihood: -6.4288
trenovanie - trieda13, ma 9180 features...
GMM model natrénovaný. Log-likelihood: -7.0947
trenovanie - trieda14, ma 7281 features...




GMM model natrénovaný. Log-likelihood: -6.7068
trenovanie - trieda15, ma 6467 features...
GMM model natrénovaný. Log-likelihood: -6.1887
trenovanie - trieda16, ma 5552 features...




GMM model natrénovaný. Log-likelihood: -6.7782
trenovanie - trieda17, ma 8393 features...




GMM model natrénovaný. Log-likelihood: -8.6285
trenovanie - trieda18, ma 7337 features...
GMM model natrénovaný. Log-likelihood: -7.9073
trenovanie - trieda19, ma 6085 features...
GMM model natrénovaný. Log-likelihood: -7.8592
trenovanie - trieda20, ma 5998 features...
GMM model natrénovaný. Log-likelihood: -6.5899
trenovanie - trieda21, ma 8569 features...
GMM model natrénovaný. Log-likelihood: -8.2036
trenovanie - trieda22, ma 9542 features...
GMM model natrénovaný. Log-likelihood: -8.2734
trenovanie - trieda23, ma 6402 features...




GMM model natrénovaný. Log-likelihood: -6.6969
trenovanie - trieda24, ma 7568 features...




GMM model natrénovaný. Log-likelihood: -7.4995
trenovanie - trieda25, ma 6801 features...




GMM model natrénovaný. Log-likelihood: -6.4551
trenovanie - trieda26, ma 19545 features...
GMM model natrénovaný. Log-likelihood: -7.9879
trenovanie - trieda27, ma 6293 features...




GMM model natrénovaný. Log-likelihood: -8.4904
trenovanie - trieda28, ma 6672 features...
GMM model natrénovaný. Log-likelihood: -6.7943
trenovanie - trieda29, ma 9433 features...
GMM model natrénovaný. Log-likelihood: -7.0213
trenovanie - trieda30, ma 6020 features...




GMM model natrénovaný. Log-likelihood: -6.9094
trenovanie - trieda31, ma 6558 features...
GMM model natrénovaný. Log-likelihood: -6.5619
Saved modelsAudio\audio_gmm_sklearn_8g_20ifull.pkl

Pipeline pre trénovanie audio modelu (scikit-learn GMM) dokončená.


# TESTOVANIE MAIN

In [31]:

print("\nTESTOVANIE EVAL")

# LOAD MODEL
model_path = AUDIO_GMM_MODEL_FILE 
trained_gmms_loaded = load_model(model_path)


print(f"pocet tried debug {len(trained_gmms_loaded)}")
# 2. Definovanie testovacieho adresára a výstupného súboru
test_dir = "test"  # Adresár s evaluačnými dátami
output_file = "audio_GMM_sklearntest.txt"  # Názov výstupného súboru
train_dir = "train"  # Adresár s evaluačnými dátami
output_file_train = "audio_GMM_sklearntrain.txt"
# 3. Spustenie spracovania testovacích súborov
process_test_wav_files_sklearn(test_dir, trained_gmms_loaded, output_file)
#rocess_test_wav_files_sklearn(train_dir, trained_gmms_loaded, output_file_train)
print(output_file)




TESTOVANIE EVAL
Model loaded modelsAudio\audio_gmm_sklearn_8g_20ifull.pkl
pocet tried debug 31
Processing file:  test\1\f401_04_f12_i0_0.wav
Processing file:  test\1\f401_04_p01_i0_0.wav
Processing file:  test\2\f402_04_f12_i0_0.wav
Processing file:  test\2\f402_04_p01_i0_0.wav
Processing file:  test\3\f403_04_f12_i0_0.wav
Processing file:  test\3\f403_04_p01_i0_0.wav
Processing file:  test\4\f404_04_f12_i0_0.wav
Processing file:  test\4\f404_04_p01_i0_0.wav
Processing file:  test\5\f405_04_f12_i0_0.wav
Processing file:  test\5\f405_04_p01_i0_0.wav
Processing file:  test\6\f406_04_f12_i0_0.wav
Processing file:  test\6\f406_04_p01_i0_0.wav
Processing file:  test\7\f407_04_f12_i0_0.wav
Processing file:  test\7\f407_04_p01_i0_0.wav
Processing file:  test\8\f408_04_f12_i0_0.wav
Processing file:  test\8\f408_04_p01_i0_0.wav
Processing file:  test\9\f409_04_f12_i0_0.wav
Processing file:  test\9\f409_04_p01_i0_0.wav
Processing file:  test\10\f410_04_f12_i0_0.wav
Processing file:  test\10\f41

# update pre data eval strukturu 

In [32]:
def process_eval_files_sklearn(eval_dir, gmm_models, output_file):
    """Spracuje wav subory v eval adresari, klasifikuje a ulozi"""
    import glob
    
    # Nájdeme všetky WAV súbory v eval adresári
    wav_files = glob.glob(os.path.join(eval_dir, '*.wav'))
    
    # Extrakcia MFCC príznakov
    mfcc_dict = ikrlib.wav16khz2mfcc(eval_dir)
    
    results = []
    
    print(f"\nSpracovanie súborov v eval adresári: {eval_dir}")
    
    for wav_path in wav_files:
        file_name = os.path.splitext(os.path.basename(wav_path))[0]
        raw_mfcc = mfcc_dict[wav_path]
        features = filter_silence(raw_mfcc)
        
        # Klasifikácia
        predicted_class = classify_audio(features, gmm_models)
        
        # Výpočet skóre pre všetky triedy
        scores = []
        for i in range(1, 32):
            score = gmm_models[i].score(features)
            scores.append(score)
        
        results.append((file_name, predicted_class, scores))
    
    # Zápis výsledkov do súboru
    with open(output_file, 'w') as f:
        for file_name, pred_class, score_values in results:
            score_str = ' '.join([f"{score:.6f}" for score in score_values])
            f.write(f"{file_name} {pred_class} {score_str}\n")
    
    return results

In [33]:
#pipeline
model_path = AUDIO_GMM_MODEL_FILE 
trained_gmms_loaded = load_model(model_path)

print(f"pocet tried debug {len(trained_gmms_loaded)}")

eval_dir = "eval"
output_file_eval = "audio_GMM_sklearn_eval.txt"

process_eval_files_sklearn(eval_dir, trained_gmms_loaded, output_file_eval)

print(f"Eval: {output_file_eval}")

Model loaded modelsAudio\audio_gmm_sklearn_8g_20ifull.pkl
pocet tried debug 31
Processing file:  eval\eval_00001.wav
Processing file:  eval\eval_00002.wav
Processing file:  eval\eval_00003.wav
Processing file:  eval\eval_00004.wav
Processing file:  eval\eval_00005.wav
Processing file:  eval\eval_00006.wav
Processing file:  eval\eval_00007.wav
Processing file:  eval\eval_00008.wav
Processing file:  eval\eval_00009.wav
Processing file:  eval\eval_00010.wav
Processing file:  eval\eval_00011.wav
Processing file:  eval\eval_00012.wav
Processing file:  eval\eval_00013.wav
Processing file:  eval\eval_00014.wav
Processing file:  eval\eval_00015.wav
Processing file:  eval\eval_00016.wav
Processing file:  eval\eval_00017.wav
Processing file:  eval\eval_00018.wav
Processing file:  eval\eval_00019.wav
Processing file:  eval\eval_00020.wav
Processing file:  eval\eval_00021.wav
Processing file:  eval\eval_00022.wav
Processing file:  eval\eval_00023.wav
Processing file:  eval\eval_00024.wav
Processin