speech-verification / preprocessing.py
MR-AI-007's picture
don't forget to preprocessing, it really important
91c6e26 verified
raw
history blame
3.63 kB
import os
import numpy as np
from pydub import AudioSegment
from scipy.io import wavfile
def trim_wav(input_file, output_file, target_duration_minutes=24, target_duration_seconds=10):
"""
Memotong file WAV sesuai durasi target
"""
try:
# Load file WAV
audio = AudioSegment.from_wav(input_file)
# Konversi target durasi ke milidetik
target_duration_ms = (target_duration_minutes * 60 + target_duration_seconds) * 1000
# Cek apakah durasi audio melebihi target
if len(audio) <= target_duration_ms:
print(f"File {input_file} memiliki durasi kurang dari target, diloncati")
return None
# Potong audio sesuai durasi yang diinginkan
trimmed_audio = audio[:target_duration_ms]
# Export file yang sudah dipotong
trimmed_audio.export(output_file, format="wav")
print(f"File berhasil dipotong menjadi {target_duration_minutes} menit {target_duration_seconds} detik")
return output_file
except Exception as e:
print(f"Terjadi kesalahan saat memotong file: {str(e)}")
return None
def segment_wav_file(input_file, output_dir, segment_duration=5, overlap_percentage=0.5):
"""
Memotong file WAV menjadi segmen-segmen dengan overlap
"""
# Create output directory if not exists
os.makedirs(output_dir, exist_ok=True)
# Read the input WAV file
sample_rate, audio_data = wavfile.read(input_file)
# Convert segment duration to samples
samples_per_segment = int(segment_duration * sample_rate)
samples_overlap = int(samples_per_segment * overlap_percentage)
# Calculate step size between segments
step_size = samples_per_segment - samples_overlap
# Segment the audio
segmented_files = []
for start in range(0, len(audio_data) - samples_per_segment + 1, step_size):
end = start + samples_per_segment
segment = audio_data[start:end]
# Generate output filename dengan format "sound_segment_X.wav"
segment_filename = f"ridho_segment_{start//step_size}.wav"
output_path = os.path.join(output_dir, segment_filename)
# Write segmented audio to file
wavfile.write(output_path, sample_rate, segment)
segmented_files.append(output_path)
print(f"Berhasil membuat {len(segmented_files)} segmen")
return segmented_files
def process_audio_pipeline(input_wav_file, output_dir):
"""
Fungsi utama untuk memproses single file WAV
"""
try:
# Buat direktori output jika belum ada
os.makedirs(output_dir, exist_ok=True)
# Step 1: Trim to 24:10
trimmed_wav = os.path.join(output_dir, "trimmed.wav")
trimmed_result = trim_wav(input_wav_file, trimmed_wav)
if not trimmed_result:
return
# Step 2: Create segments
segments_dir = os.path.join(output_dir, "segments")
segmented_files = segment_wav_file(
trimmed_wav,
segments_dir,
segment_duration=5,
overlap_percentage=0.5
)
print("Proses selesai!")
return segmented_files
except Exception as e:
print(f"Terjadi kesalahan dalam pipeline: {str(e)}")
return None
# Contoh penggunaan
if __name__ == "__main__":
# Sesuaikan path file input dan direktori output
input_wav = "full recording.wav"
output_dir = "/nama/path/to/target/dataset"
process_audio_pipeline(input_wav, output_dir)