Spaces:

clayton07
/

qwen2-colpali-ocr

Running

App Files Files Community

clayton07 commited on Sep 29, 2024

Commit

b5ba0b7

verified ·

1 Parent(s): ca3b38f

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -35

app.py CHANGED Viewed

@@ -82,42 +82,46 @@ if uploaded_file is not None:
     # Text query input
     text_query = st.text_input("Enter your query about the image:")
     if text_query:
-        # Perform RAG search
-        results = RAG.search(text_query, k=2)
-        # Process with Qwen2VL model
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "image",
-                        "image": image_path,
-                    },
-                    {"type": "text", "text": text_query},
-                ],
-            }
-        ]
-        text = processor.apply_chat_template(
-            messages, tokenize=False, add_generation_prompt=True
-        )
-        image_inputs, video_inputs = process_vision_info(messages)
-        inputs = processor(
-            text=[text],
-            images=image_inputs,
-            videos=video_inputs,
-            padding=True,
-            return_tensors="pt",
-        )
-        inputs = inputs.to(device)
-        generated_ids = model.generate(**inputs, max_new_tokens=100)
-        generated_ids_trimmed = [
-            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-        ]
-        output_text = processor.batch_decode(
-            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
         # Display results
         st.subheader("Results:")

     # Text query input
     text_query = st.text_input("Enter your query about the image:")
+    max_new_tokens = st.slider("Max new tokens for response", min_value=100, max_value=1000, value=100, step=10)
     if text_query:
+        with st.spinner(
+                f'Processing your query... This may take a while due to CPU processing. Generating up to {max_new_tokens} tokens.'):
+            # Perform RAG search
+            results = RAG.search(text_query, k=2)
+            # Process with Qwen2VL model
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image",
+                            "image": image_path,
+                        },
+                        {"type": "text", "text": text_query},
+                    ],
+                }
+            ]
+            text = processor.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+            image_inputs, video_inputs = process_vision_info(messages)
+            inputs = processor(
+                text=[text],
+                images=image_inputs,
+                videos=video_inputs,
+                padding=True,
+                return_tensors="pt",
+            )
+            inputs = inputs.to(device)
+            generated_ids = model.generate(**inputs, max_new_tokens=max_new_tokens)  # Using the slider value here
+            generated_ids_trimmed = [
+                out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+            ]
+            output_text = processor.batch_decode(
+                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )
         # Display results
         st.subheader("Results:")