speech-verification / preprocessing.py
MR-AI-007's picture
don't forget to preprocessing, it really important
91c6e26 verified
import os
import numpy as np
from pydub import AudioSegment
from scipy.io import wavfile
def trim_wav(input_file, output_file, target_duration_minutes=24, target_duration_seconds=10):
"""
Memotong file WAV sesuai durasi target
"""
try:
# Load file WAV
audio = AudioSegment.from_wav(input_file)
# Konversi target durasi ke milidetik
target_duration_ms = (target_duration_minutes * 60 + target_duration_seconds) * 1000
# Cek apakah durasi audio melebihi target
if len(audio) <= target_duration_ms:
print(f"File {input_file} memiliki durasi kurang dari target, diloncati")
return None
# Potong audio sesuai durasi yang diinginkan
trimmed_audio = audio[:target_duration_ms]
# Export file yang sudah dipotong
trimmed_audio.export(output_file, format="wav")
print(f"File berhasil dipotong menjadi {target_duration_minutes} menit {target_duration_seconds} detik")
return output_file
except Exception as e:
print(f"Terjadi kesalahan saat memotong file: {str(e)}")
return None
def segment_wav_file(input_file, output_dir, segment_duration=5, overlap_percentage=0.5):
"""
Memotong file WAV menjadi segmen-segmen dengan overlap
"""
# Create output directory if not exists
os.makedirs(output_dir, exist_ok=True)
# Read the input WAV file
sample_rate, audio_data = wavfile.read(input_file)
# Convert segment duration to samples
samples_per_segment = int(segment_duration * sample_rate)
samples_overlap = int(samples_per_segment * overlap_percentage)
# Calculate step size between segments
step_size = samples_per_segment - samples_overlap
# Segment the audio
segmented_files = []
for start in range(0, len(audio_data) - samples_per_segment + 1, step_size):
end = start + samples_per_segment
segment = audio_data[start:end]
# Generate output filename dengan format "sound_segment_X.wav"
segment_filename = f"ridho_segment_{start//step_size}.wav"
output_path = os.path.join(output_dir, segment_filename)
# Write segmented audio to file
wavfile.write(output_path, sample_rate, segment)
segmented_files.append(output_path)
print(f"Berhasil membuat {len(segmented_files)} segmen")
return segmented_files
def process_audio_pipeline(input_wav_file, output_dir):
"""
Fungsi utama untuk memproses single file WAV
"""
try:
# Buat direktori output jika belum ada
os.makedirs(output_dir, exist_ok=True)
# Step 1: Trim to 24:10
trimmed_wav = os.path.join(output_dir, "trimmed.wav")
trimmed_result = trim_wav(input_wav_file, trimmed_wav)
if not trimmed_result:
return
# Step 2: Create segments
segments_dir = os.path.join(output_dir, "segments")
segmented_files = segment_wav_file(
trimmed_wav,
segments_dir,
segment_duration=5,
overlap_percentage=0.5
)
print("Proses selesai!")
return segmented_files
except Exception as e:
print(f"Terjadi kesalahan dalam pipeline: {str(e)}")
return None
# Contoh penggunaan
if __name__ == "__main__":
# Sesuaikan path file input dan direktori output
input_wav = "full recording.wav"
output_dir = "/nama/path/to/target/dataset"
process_audio_pipeline(input_wav, output_dir)