Spaces:

sedemkofi
/

twi-transcription

Sleeping

App Files Files Community

sedemkofi commited on Dec 11, 2024

Commit

346a962

verified ·

1 Parent(s): e2e2e8b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -91

app.py CHANGED Viewed

@@ -7,100 +7,40 @@ import json
 from io import BytesIO
 import base64
-class TwiTranscriptionModel:
-    def __init__(self, encoder_model, decoder_model, char_tokenizer, max_length=50):
-        self.encoder_model = encoder_model
-        self.decoder_model = decoder_model
-        self.char_tokenizer = char_tokenizer
-        self.max_length = max_length
-        self.sos_token = '<sos>'
-        self.eos_token = '<eos>'
-        self.sos_index = char_tokenizer.word_index[self.sos_token]
-        self.eos_index = char_tokenizer.word_index[self.eos_token]
-    def predict(self, audio_features):
-        batch_size = audio_features.shape[0]
-        transcriptions = []
-        for i in range(batch_size):
-            states_value = self.encoder_model.predict(
-                audio_features[i:i+1],
-                verbose=0
-            )
-            target_seq = np.array([[self.sos_index]])
-            decoded_chars = []
-            for _ in range(self.max_length):
-                output_tokens, h, c = self.decoder_model.predict(
-                    [target_seq] + states_value,
-                    verbose=0
-                )
-                sampled_token_index = np.argmax(output_tokens[0, -1, :])
-                sampled_char = self.char_tokenizer.index_word.get(sampled_token_index, '')
-                if sampled_char == self.eos_token or len(decoded_chars) > self.max_length:
-                    break
-                decoded_chars.append(sampled_char)
-                target_seq = np.array([[sampled_token_index]])
-                states_value = [h, c]
-            transcriptions.append(''.join(decoded_chars))
-        return transcriptions
-@st.cache_resource
-def get_model():
-    try:
-        with open('twi_transcription_model.pkl', 'rb') as f:
-            model_data = pickle.load(f)
-            return TwiTranscriptionModel(
-                model_data['encoder_model'],
-                model_data['decoder_model'],
-                model_data['char_tokenizer'],
-                model_data['max_length']
-            )
-    except Exception as e:
-        st.error(f"Error loading model: {str(e)}")
-        return None
-def extract_mfcc(audio_data, sr=16000, n_mfcc=13):
-    if sr != 16000:
-        audio_data = librosa.resample(y=audio_data, orig_sr=sr, target_sr=16000)
-    mfcc = librosa.feature.mfcc(y=audio_data, sr=16000, n_mfcc=n_mfcc)
-    max_length = 1000  # Adjust based on your model's requirements
-    if mfcc.shape[1] > max_length:
-        mfcc = mfcc[:, :max_length]
-    else:
-        mfcc = np.pad(mfcc, ((0, 0), (0, max_length - mfcc.shape[1])), mode='constant')
-    return mfcc.T
-def calculate_error_rates(reference, hypothesis):
-    try:
-        error_wer = wer(reference, hypothesis)
-        error_cer = cer(reference, hypothesis)
-        return error_wer, error_cer
-    except Exception as e:
-        return None, None
-def process_audio_bytes(audio_bytes):
-    try:
-        audio_data, sr = librosa.load(BytesIO(audio_bytes), sr=None)
-        if len(audio_data.shape) > 1:
-            audio_data = np.mean(audio_data, axis=1)
-        return audio_data, sr
-    except Exception as e:
-        raise Exception(f"Error processing audio: {str(e)}")
-# Set page config
-st.set_page_config(page_title="Twi Speech API")
 def main():
     model = get_model()
     if model is None:
         st.write(json.dumps({
@@ -123,7 +63,34 @@ def main():
                 }))
                 return
-            audio_base64 = data.get('audio')
             reference_text = data.get('reference_text')
             if not audio_base64:

 from io import BytesIO
 import base64
+# ... (keep your existing imports and TwiTranscriptionModel class) ...
+# Add this at the top of your file
+class ChunkedUploader:
+    def __init__(self):
+        if 'chunks' not in st.session_state:
+            st.session_state.chunks = {}
+        if 'current_upload_id' not in st.session_state:
+            st.session_state.current_upload_id = None
+    def add_chunk(self, upload_id, chunk_num, total_chunks, chunk_data):
+        if upload_id not in st.session_state.chunks:
+            st.session_state.chunks[upload_id] = {'data': {}, 'total': total_chunks}
+        st.session_state.chunks[upload_id]['data'][chunk_num] = chunk_data
+    def is_upload_complete(self, upload_id):
+        if upload_id not in st.session_state.chunks:
+            return False
+        upload = st.session_state.chunks[upload_id]
+        return len(upload['data']) == upload['total']
+    def get_complete_data(self, upload_id):
+        if not self.is_upload_complete(upload_id):
+            return None
+        chunks = st.session_state.chunks[upload_id]['data']
+        sorted_chunks = [chunks[i] for i in range(len(chunks))]
+        complete_data = ''.join(sorted_chunks)
+        # Clean up after getting data
+        del st.session_state.chunks[upload_id]
+        return complete_data
 def main():
     model = get_model()
+    chunked_uploader = ChunkedUploader()
     if model is None:
         st.write(json.dumps({
                 }))
                 return
+            # Handle chunked upload
+            if 'chunk_data' in data:
+                upload_id = data.get('upload_id')
+                chunk_num = data.get('chunk_num')
+                total_chunks = data.get('total_chunks')
+                chunk_data = data.get('chunk_data')
+                if not all([upload_id, chunk_num is not None, total_chunks, chunk_data]):
+                    st.write(json.dumps({
+                        'error': 'Missing chunked upload parameters',
+                        'status': 'error'
+                    }))
+                    return
+                chunked_uploader.add_chunk(upload_id, chunk_num, total_chunks, chunk_data)
+                if not chunked_uploader.is_upload_complete(upload_id):
+                    st.write(json.dumps({
+                        'status': 'pending',
+                        'message': f'Received chunk {chunk_num + 1} of {total_chunks}'
+                    }))
+                    return
+                # Get complete data if upload is finished
+                audio_base64 = chunked_uploader.get_complete_data(upload_id)
+            else:
+                audio_base64 = data.get('audio')
             reference_text = data.get('reference_text')
             if not audio_base64: