Files
2025-08-26 13:24:53 -07:00

277 lines
10 KiB
Python

from PySide6.QtWidgets import (
QWidget, QVBoxLayout, QHBoxLayout, QLabel, QPushButton,
QTextEdit, QComboBox, QLineEdit, QProgressBar, QMessageBox,
QSplitter, QFrame, QFileDialog, QTabWidget, QApplication
)
from PySide6.QtCore import Qt, Signal, Slot, QTimer
from PySide6.QtGui import QPixmap, QImage, QFont, QColor, QPalette
import sys
import os
import cv2
import numpy as np
from pathlib import Path
from datetime import datetime
# Add parent directory to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.annotation_utils import convert_cv_to_qimage, convert_cv_to_pixmap
class VLMTab(QWidget):
"""Tab for Vision Language Model interaction (OpenVINO local, user prompt)."""
process_image_requested = Signal(np.ndarray, str) # image, prompt
def __init__(self):
super().__init__()
self.setupUI()
self.current_image = None
self.result_history = []
def setupUI(self):
"""Set up the user interface."""
main_layout = QVBoxLayout()
def setupUI(self):
"""Set up the user interface."""
main_layout = QVBoxLayout()
# Create splitter for adjustable layout
splitter = QSplitter(Qt.Horizontal)
# === Left panel (image display) ===
left_panel = QWidget()
left_layout = QVBoxLayout()
# Image label
self.image_label = QLabel("No image loaded")
self.image_label.setAlignment(Qt.AlignCenter)
self.image_label.setMinimumSize(400, 300)
self.image_label.setStyleSheet("background-color: #1e1e1e; border: 1px solid #333;")
left_layout.addWidget(self.image_label)
# Image controls
image_controls = QHBoxLayout()
self.load_image_btn = QPushButton("Load Image")
self.load_image_btn.clicked.connect(self.load_image)
image_controls.addWidget(self.load_image_btn)
self.capture_frame_btn = QPushButton("Capture Frame")
self.capture_frame_btn.clicked.connect(self.capture_frame)
image_controls.addWidget(self.capture_frame_btn)
left_layout.addLayout(image_controls)
# Prompt input (only prompt, no task selection)
prompt_layout = QHBoxLayout()
prompt_label = QLabel("Prompt:")
prompt_layout.addWidget(prompt_label)
self.prompt_edit = QLineEdit()
self.prompt_edit.setPlaceholderText("Enter your prompt/question for the VLM...")
self.prompt_edit.returnPressed.connect(self.process_query)
prompt_layout.addWidget(self.prompt_edit)
self.process_btn = QPushButton("Process")
self.process_btn.clicked.connect(self.process_query)
prompt_layout.addWidget(self.process_btn)
left_layout.addLayout(prompt_layout)
# Progress bar
self.progress_bar = QProgressBar()
self.progress_bar.setRange(0, 100)
self.progress_bar.setValue(0)
self.progress_bar.setTextVisible(True)
self.progress_bar.setVisible(False)
left_layout.addWidget(self.progress_bar)
left_panel.setLayout(left_layout)
# === Right panel (results) ===
right_panel = QWidget()
right_layout = QVBoxLayout()
# Results label
results_label = QLabel("Results:")
results_label.setFont(QFont("Arial", 12, QFont.Bold))
right_layout.addWidget(results_label)
# Results display
self.results_text = QTextEdit()
self.results_text.setReadOnly(True)
self.results_text.setMinimumWidth(350)
right_layout.addWidget(self.results_text)
# History controls
history_layout = QHBoxLayout()
self.clear_btn = QPushButton("Clear Results")
self.clear_btn.clicked.connect(self.clear_results)
history_layout.addWidget(self.clear_btn)
self.copy_btn = QPushButton("Copy Results")
self.copy_btn.clicked.connect(self.copy_results)
history_layout.addWidget(self.copy_btn)
right_layout.addLayout(history_layout)
right_panel.setLayout(right_layout)
# Add panels to splitter
splitter.addWidget(left_panel)
splitter.addWidget(right_panel)
splitter.setSizes([500, 500]) # Initial size distribution
# Add splitter to main layout
main_layout.addWidget(splitter)
# Status bar
self.status_label = QLabel("Ready")
main_layout.addWidget(self.status_label)
self.setLayout(main_layout)
def load_image(self):
"""Load an image from file."""
file_path, _ = QFileDialog.getOpenFileName(
self,
"Open Image",
"",
"Images (*.png *.jpg *.jpeg *.bmp *.tif *.tiff)"
)
if file_path:
try:
self.current_image = cv2.imread(file_path)
if self.current_image is None:
QMessageBox.critical(self, "Error", "Failed to load image")
return
# Convert to RGB for display
self.current_image = cv2.cvtColor(self.current_image, cv2.COLOR_BGR2RGB)
self.update_image_display()
self.status_label.setText(f"Loaded image: {Path(file_path).name}")
except Exception as e:
QMessageBox.critical(self, "Error", f"Error loading image: {str(e)}")
@Slot()
def capture_frame(self):
"""Capture current frame from video processing."""
# This should be connected to the video controller
self.status_label.setText("Waiting for frame from video feed...")
@Slot(np.ndarray)
def set_frame(self, frame):
"""Set the current frame from video processing."""
if frame is not None:
self.current_image = frame.copy()
# Convert BGR to RGB if needed
if len(frame.shape) == 3 and frame.shape[2] == 3:
self.current_image = cv2.cvtColor(self.current_image, cv2.COLOR_BGR2RGB)
self.update_image_display()
self.status_label.setText("Frame captured from video feed")
def update_image_display(self):
"""Update the image display with the current image."""
if self.current_image is not None:
# Resize image for display while maintaining aspect ratio
h, w = self.current_image.shape[:2]
max_size = 500
if h > max_size or w > max_size:
if h > w:
new_h = max_size
new_w = int(w * (max_size / h))
else:
new_w = max_size
new_h = int(h * (max_size / w))
display_img = cv2.resize(self.current_image, (new_w, new_h))
else:
display_img = self.current_image
# Convert to QPixmap and display
qimage = QImage(
display_img.data,
display_img.shape[1],
display_img.shape[0],
display_img.shape[1] * 3,
QImage.Format_RGB888
)
pixmap = QPixmap.fromImage(qimage)
self.image_label.setPixmap(pixmap)
@Slot()
def process_query(self):
"""Process the current image with the user prompt."""
if self.current_image is None:
QMessageBox.warning(self, "Warning", "Please load or capture an image first")
return
prompt = self.prompt_edit.text().strip()
if not prompt:
QMessageBox.warning(self, "Warning", "Please enter a prompt")
return
self.progress_bar.setVisible(True)
self.progress_bar.setValue(10)
self.status_label.setText("Processing...")
self.process_btn.setEnabled(False)
self.process_image_requested.emit(self.current_image, prompt)
@Slot(dict)
def handle_result(self, result):
"""Handle the result from VLM processing."""
self.progress_bar.setValue(100)
self.process_btn.setEnabled(True)
# Hide progress bar after a short delay
QTimer.singleShot(1000, lambda: self.progress_bar.setVisible(False))
# Format and display result
task = result.get("task", "unknown")
query = result.get("query", "")
answer = result.get("answer", "No answer provided")
# Format timestamp
timestamp = datetime.now().strftime("%H:%M:%S")
# Add to results text
result_text = f"[{timestamp}] "
if task == "vqa":
result_text += f"Q: {query}\nA: {answer}\n\n"
else: # search
result_text += f"Search: {query}\nResults: {answer}\n\n"
# Add to display
current_text = self.results_text.toPlainText()
self.results_text.setText(result_text + current_text)
# Update status
self.status_label.setText("Processing complete")
# Add to history
self.result_history.append({
"timestamp": timestamp,
"task": task,
"query": query,
"answer": answer
})
@Slot(str)
def handle_error(self, error_msg):
"""Handle errors from VLM processing."""
self.progress_bar.setValue(0)
self.progress_bar.setVisible(False)
self.process_btn.setEnabled(True)
QMessageBox.warning(self, "Processing Error", error_msg)
self.status_label.setText(f"Error: {error_msg}")
@Slot(int)
def update_progress(self, value):
"""Update progress bar."""
self.progress_bar.setValue(value)
@Slot()
def clear_results(self):
"""Clear the results display."""
self.results_text.clear()
self.status_label.setText("Results cleared")
@Slot()
def copy_results(self):
"""Copy results to clipboard."""
text = self.results_text.toPlainText()
clipboard = QApplication.clipboard()
clipboard.setText(text)
self.status_label.setText("Results copied to clipboard")