|
import os |
|
import numpy as np |
|
from pydub import AudioSegment |
|
from scipy.io import wavfile |
|
|
|
def trim_wav(input_file, output_file, target_duration_minutes=24, target_duration_seconds=10): |
|
""" |
|
Memotong file WAV sesuai durasi target |
|
""" |
|
try: |
|
|
|
audio = AudioSegment.from_wav(input_file) |
|
|
|
|
|
target_duration_ms = (target_duration_minutes * 60 + target_duration_seconds) * 1000 |
|
|
|
|
|
if len(audio) <= target_duration_ms: |
|
print(f"File {input_file} memiliki durasi kurang dari target, diloncati") |
|
return None |
|
|
|
|
|
trimmed_audio = audio[:target_duration_ms] |
|
|
|
|
|
trimmed_audio.export(output_file, format="wav") |
|
|
|
print(f"File berhasil dipotong menjadi {target_duration_minutes} menit {target_duration_seconds} detik") |
|
return output_file |
|
|
|
except Exception as e: |
|
print(f"Terjadi kesalahan saat memotong file: {str(e)}") |
|
return None |
|
|
|
def segment_wav_file(input_file, output_dir, segment_duration=5, overlap_percentage=0.5): |
|
""" |
|
Memotong file WAV menjadi segmen-segmen dengan overlap |
|
""" |
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
sample_rate, audio_data = wavfile.read(input_file) |
|
|
|
|
|
samples_per_segment = int(segment_duration * sample_rate) |
|
samples_overlap = int(samples_per_segment * overlap_percentage) |
|
|
|
|
|
step_size = samples_per_segment - samples_overlap |
|
|
|
|
|
segmented_files = [] |
|
for start in range(0, len(audio_data) - samples_per_segment + 1, step_size): |
|
end = start + samples_per_segment |
|
segment = audio_data[start:end] |
|
|
|
|
|
segment_filename = f"ridho_segment_{start//step_size}.wav" |
|
output_path = os.path.join(output_dir, segment_filename) |
|
|
|
|
|
wavfile.write(output_path, sample_rate, segment) |
|
segmented_files.append(output_path) |
|
|
|
print(f"Berhasil membuat {len(segmented_files)} segmen") |
|
return segmented_files |
|
|
|
def process_audio_pipeline(input_wav_file, output_dir): |
|
""" |
|
Fungsi utama untuk memproses single file WAV |
|
""" |
|
try: |
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
trimmed_wav = os.path.join(output_dir, "trimmed.wav") |
|
trimmed_result = trim_wav(input_wav_file, trimmed_wav) |
|
if not trimmed_result: |
|
return |
|
|
|
|
|
segments_dir = os.path.join(output_dir, "segments") |
|
segmented_files = segment_wav_file( |
|
trimmed_wav, |
|
segments_dir, |
|
segment_duration=5, |
|
overlap_percentage=0.5 |
|
) |
|
|
|
print("Proses selesai!") |
|
return segmented_files |
|
|
|
except Exception as e: |
|
print(f"Terjadi kesalahan dalam pipeline: {str(e)}") |
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
input_wav = "full recording.wav" |
|
output_dir = "/nama/path/to/target/dataset" |
|
|
|
process_audio_pipeline(input_wav, output_dir) |