Spaces:

soiz1
/

seed-vc2

Running

App Files Files Community

GPTfree api commited on Dec 20, 2024

Commit

7038078

verified ·

1 Parent(s): a06c4fc

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -57

app.py CHANGED Viewed

@@ -1,60 +1,82 @@
 import gradio as gr
-from gradio_client import Client, handle_file
-# クライアント設定
-client = Client("Plachta/Seed-VC")
-def process_audio(
-    source,
-    target,
-    diffusion_steps=25,
-    length_adjust=1,
-    inference_cfg_rate=0.7,
-    f0_condition=False,
-    auto_f0_adjust=True,
-    pitch_shift=0
-):
-    # API呼び出し
-    result = client.predict(
-        source=handle_file(source.name),
-        target=handle_file(target.name),
-        diffusion_steps=diffusion_steps,
-        length_adjust=length_adjust,
-        inference_cfg_rate=inference_cfg_rate,
-        f0_condition=f0_condition,
-        auto_f0_adjust=auto_f0_adjust,
-        pitch_shift=pitch_shift,
-        api_name="/predict"
-    )
-    return result
-# Gradioインターフェース作成
-with gr.Blocks() as demo:
-    gr.Markdown("# Audio Transformation with Seed-VC")
-    with gr.Row():
-        source_audio = gr.Audio(label="Source Audio", type="file")
-        target_audio = gr.Audio(label="Reference Audio", type="file")
-    diffusion_steps = gr.Slider(1, 50, value=25, label="Diffusion Steps")
-    length_adjust = gr.Slider(0.5, 2, value=1, label="Length Adjust")
-    inference_cfg_rate = gr.Slider(0.1, 1.0, value=0.7, label="Inference CFG Rate")
-    f0_condition = gr.Checkbox(label="Use F0 conditioned model")
-    auto_f0_adjust = gr.Checkbox(label="Auto F0 adjust", value=True)
-    pitch_shift = gr.Slider(-12, 12, value=0, label="Pitch shift")
-    output_stream = gr.Audio(label="Stream Output Audio")
-    output_full = gr.Audio(label="Full Output Audio")
-    run_button = gr.Button("Transform Audio")
-    run_button.click(
-        process_audio,
-        inputs=[
-            source_audio, target_audio, diffusion_steps, length_adjust,
-            inference_cfg_rate, f0_condition, auto_f0_adjust, pitch_shift
-        ],
-        outputs=[output_stream, output_full]
     )
-demo.launch()

+import os
+import torch
+import requests
 import gradio as gr
+import torchaudio
+# モデルのダウンロード関数
+def download_model(model_url, output_path):
+    if not os.path.exists(output_path):
+        print(f"Downloading model from {model_url} to {output_path}")
+        response = requests.get(model_url, stream=True)
+        if response.status_code == 200:
+            with open(output_path, 'wb') as f:
+                f.write(response.content)
+            print("Model downloaded successfully.")
+        else:
+            raise ValueError(f"Failed to download model: {response.status_code}")
+    else:
+        print(f"Model already exists at {output_path}")
+# モデルロード用の関数
+def load_model(model_path):
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model file not found: {model_path}")
+    print(f"Loading model from {model_path}")
+    try:
+        model = torch.load(model_path, map_location=torch.device('cpu'))
+        model.eval()
+        return model
+    except Exception as e:
+        raise ValueError(f"Failed to load model. Please ensure it is a valid PyTorch model file: {e}")
+# 音声処理関数
+def process_audio(audio_filepath, model_path):
+    try:
+        # モデルをロード
+        model = load_model(model_path)
+        # 入力音声のテンソル化
+        waveform, sample_rate = torchaudio.load(audio_filepath)
+        print(f"Loaded audio with shape {waveform.shape} and sample rate {sample_rate}")
+        # モデルに音声を入力し処理
+        with torch.no_grad():
+            processed_waveform = model(waveform)
+        # 処理結果の確認
+        if processed_waveform is None or processed_waveform.shape[1] == 0:
+            raise ValueError("Model returned empty waveform")
+        # 出力を保存
+        output_path = "processed_audio.wav"
+        torchaudio.save(output_path, processed_waveform, sample_rate)
+        print(f"Processed audio saved to {output_path}")
+        return output_path
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return f"Error: {str(e)}"
+# Gradioインターフェース
+def create_interface():
+    model_url = "https://huggingface.co/spaces/adhisetiawan/anime-voice-generator/raw/main/pretrained_models/alice/alice.pth"
+    model_path = "alice.pth"  # ローカルに保存するモデルファイル名
+    # モデルをダウンロード
+    download_model(model_url, model_path)
+    # Gradioインターフェース
+    interface = gr.Interface(
+        fn=lambda audio_filepath: process_audio(audio_filepath, model_path),
+        inputs=gr.Audio(type="filepath", label="Source Audio"),  # 修正ポイント: type="filepath"
+        outputs=gr.Audio(type="filepath", label="Processed Audio"),  # 修正ポイント: type="filepath"
+        title="Anime Voice Filter",
+        description="指定されたモデルを使用して音声にフィルターをかけます。"
     )
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()