408 lines
17 KiB
Python
408 lines
17 KiB
Python
from PySide6.QtCore import QObject, Signal, QThread, Qt, QMutex, QWaitCondition
|
|
from PySide6.QtWidgets import QApplication
|
|
import os
|
|
import sys
|
|
import cv2
|
|
import numpy as np
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
import json
|
|
from typing import Dict, List, Tuple, Optional
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
|
# OpenVINO GenAI imports
|
|
try:
|
|
import openvino_genai as ov_genai
|
|
import openvino as ov
|
|
print("[VLM DEBUG] OpenVINO GenAI imported successfully")
|
|
OPENVINO_AVAILABLE = True
|
|
except ImportError as e:
|
|
print(f"[VLM DEBUG] Failed to import OpenVINO GenAI: {e}")
|
|
OPENVINO_AVAILABLE = False
|
|
|
|
# PIL for image processing
|
|
try:
|
|
from PIL import Image
|
|
print("[VLM DEBUG] PIL imported successfully")
|
|
PIL_AVAILABLE = True
|
|
except ImportError as e:
|
|
print(f"[VLM DEBUG] Failed to import PIL: {e}")
|
|
PIL_AVAILABLE = False
|
|
|
|
|
|
class VLMControllerThread(QThread):
|
|
"""Worker thread for VLM processing using OpenVINO GenAI."""
|
|
result_ready = Signal(dict)
|
|
error_occurred = Signal(str)
|
|
progress_updated = Signal(int)
|
|
|
|
def __init__(self, vlm_dir=None):
|
|
super().__init__()
|
|
# Set VLM directory to the downloaded OpenVINO model
|
|
if vlm_dir is None:
|
|
current_dir = Path(__file__).parent.parent
|
|
self.vlm_dir = current_dir / "llava_openvino_model"
|
|
else:
|
|
self.vlm_dir = Path(vlm_dir).resolve()
|
|
|
|
self.mutex = QMutex()
|
|
self.condition = QWaitCondition()
|
|
self.abort = False
|
|
self.image = None
|
|
self.prompt = None
|
|
self.vlm_pipeline = None
|
|
self.device = "GPU" # DEFAULT TO GPU FOR MAXIMUM PERFORMANCE
|
|
|
|
print(f"[VLM DEBUG] VLMControllerThread initialized (OpenVINO GenAI)")
|
|
print(f"[VLM DEBUG] VLM directory: {self.vlm_dir}")
|
|
print(f"[VLM DEBUG] Directory exists: {self.vlm_dir.exists()}")
|
|
print(f"[VLM DEBUG] 🚀 DEFAULT DEVICE: GPU (priority)")
|
|
print(f"[VLM DEBUG] Model will be loaded in worker thread (non-blocking)")
|
|
|
|
def run(self):
|
|
"""Main thread loop - load model first, then process requests."""
|
|
print("[VLM DEBUG] Worker thread started, loading model...")
|
|
self._load_model()
|
|
|
|
while not self.abort:
|
|
self.mutex.lock()
|
|
try:
|
|
if self.image is None or self.prompt is None:
|
|
self.condition.wait(self.mutex, 100) # Wait for 100ms
|
|
continue
|
|
|
|
# Process the request
|
|
image = self.image
|
|
prompt = self.prompt
|
|
self.image = None
|
|
self.prompt = None
|
|
|
|
finally:
|
|
self.mutex.unlock()
|
|
|
|
# Process outside the lock
|
|
result = self._process_request(image, prompt)
|
|
self.result_ready.emit(result)
|
|
|
|
def _load_model(self):
|
|
"""Load the VLM model using OpenVINO GenAI."""
|
|
try:
|
|
print(f"[VLM DEBUG] Starting OpenVINO GenAI model loading...")
|
|
|
|
# Check if OpenVINO GenAI is available
|
|
if not OPENVINO_AVAILABLE:
|
|
print(f"[VLM DEBUG] ❌ OpenVINO GenAI not available")
|
|
return
|
|
|
|
# Check if VLM directory exists
|
|
if not self.vlm_dir.exists():
|
|
print(f"[VLM DEBUG] ❌ VLM directory does not exist: {self.vlm_dir}")
|
|
return
|
|
|
|
# List files in VLM directory
|
|
files_in_dir = list(self.vlm_dir.glob("*"))
|
|
print(f"[VLM DEBUG] 📁 Files in VLM directory ({len(files_in_dir)}):")
|
|
for file in sorted(files_in_dir):
|
|
print(f"[VLM DEBUG] - {file.name}")
|
|
|
|
# Check for required OpenVINO files
|
|
required_files = [
|
|
"openvino_language_model.xml",
|
|
"openvino_language_model.bin",
|
|
"openvino_vision_embeddings_model.xml",
|
|
"openvino_vision_embeddings_model.bin",
|
|
"openvino_text_embeddings_model.xml",
|
|
"openvino_text_embeddings_model.bin"
|
|
]
|
|
|
|
missing_files = []
|
|
for file in required_files:
|
|
if not (self.vlm_dir / file).exists():
|
|
missing_files.append(file)
|
|
|
|
if missing_files:
|
|
print(f"[VLM DEBUG] ⚠️ Missing files: {missing_files}")
|
|
else:
|
|
print(f"[VLM DEBUG] ✅ All required OpenVINO files found")
|
|
|
|
# Detect available devices with GPU priority
|
|
try:
|
|
# Check system GPU info first
|
|
try:
|
|
import subprocess
|
|
result = subprocess.run(['wmic', 'path', 'win32_VideoController', 'get', 'name'],
|
|
capture_output=True, text=True, shell=True)
|
|
if result.returncode == 0:
|
|
gpu_info = result.stdout
|
|
print(f"[VLM DEBUG] 🖥️ System GPU info:")
|
|
for line in gpu_info.split('\n'):
|
|
if line.strip() and 'Name' not in line and line.strip():
|
|
print(f"[VLM DEBUG] - {line.strip()}")
|
|
except Exception as e:
|
|
print(f"[VLM DEBUG] Could not get system GPU info: {e}")
|
|
|
|
core = ov.Core()
|
|
available_devices = core.available_devices
|
|
print(f"[VLM DEBUG] 🔍 Available OpenVINO devices: {available_devices}")
|
|
|
|
# Check GPU capabilities
|
|
gpu_available = "GPU" in available_devices
|
|
print(f"[VLM DEBUG] GPU detected by OpenVINO: {gpu_available}")
|
|
|
|
if not gpu_available:
|
|
print(f"[VLM DEBUG] ⚠️ GPU not detected by OpenVINO")
|
|
print(f"[VLM DEBUG] 💡 Possible solutions:")
|
|
print(f"[VLM DEBUG] - Update Intel GPU drivers")
|
|
print(f"[VLM DEBUG] - Install OpenVINO GPU plugin")
|
|
print(f"[VLM DEBUG] - Check GPU compatibility")
|
|
|
|
# FORCE GPU PRIORITY - Try GPU first always
|
|
if "GPU" in available_devices:
|
|
self.device = "GPU"
|
|
print(f"[VLM DEBUG] 🚀 PRIORITY: GPU selected for VLM inference")
|
|
print(f"[VLM DEBUG] 🎮 GPU will be used for enhanced performance")
|
|
|
|
# Test GPU availability with a simple operation
|
|
try:
|
|
test_tensor = ov.Tensor(np.array([[1]], dtype=np.float32))
|
|
print(f"[VLM DEBUG] ✅ GPU test tensor created successfully")
|
|
except Exception as gpu_test_error:
|
|
print(f"[VLM DEBUG] ⚠️ GPU test failed: {gpu_test_error}")
|
|
print(f"[VLM DEBUG] 🔄 Will attempt GPU anyway")
|
|
|
|
elif "CPU" in available_devices:
|
|
self.device = "CPU"
|
|
print(f"[VLM DEBUG] 🔧 FALLBACK: CPU selected (GPU not available)")
|
|
print(f"[VLM DEBUG] ✅ CPU will provide stable inference")
|
|
else:
|
|
self.device = "AUTO" # Last resort
|
|
print(f"[VLM DEBUG] 🤖 AUTO: Letting OpenVINO choose device")
|
|
|
|
except Exception as e:
|
|
print(f"[VLM DEBUG] ⚠️ Device detection failed: {e}")
|
|
print(f"[VLM DEBUG] 🔄 Defaulting to GPU (will fallback to CPU if needed)")
|
|
self.device = "GPU" # Still try GPU first
|
|
|
|
# Load the VLM pipeline with GPU priority
|
|
try:
|
|
print(f"[VLM DEBUG] 🚀 Loading VLMPipeline from: {self.vlm_dir}")
|
|
print(f"[VLM DEBUG] 🎯 Target device: {self.device}")
|
|
|
|
# Try GPU first
|
|
if self.device == "GPU":
|
|
try:
|
|
print(f"[VLM DEBUG] 🎮 Attempting GPU loading...")
|
|
self.vlm_pipeline = ov_genai.VLMPipeline(str(self.vlm_dir), "GPU")
|
|
self.device = "GPU"
|
|
print(f"[VLM DEBUG] ✅ VLMPipeline loaded successfully on GPU!")
|
|
print(f"[VLM DEBUG] 🎉 OpenVINO GenAI VLM ready for GPU inference")
|
|
except Exception as gpu_error:
|
|
print(f"[VLM DEBUG] ❌ GPU loading failed: {gpu_error}")
|
|
print(f"[VLM DEBUG] 🔄 Trying CPU fallback...")
|
|
|
|
try:
|
|
self.vlm_pipeline = ov_genai.VLMPipeline(str(self.vlm_dir), "CPU")
|
|
self.device = "CPU"
|
|
print(f"[VLM DEBUG] ✅ VLMPipeline loaded on CPU fallback!")
|
|
except Exception as cpu_error:
|
|
print(f"[VLM DEBUG] ❌ CPU fallback also failed: {cpu_error}")
|
|
self.vlm_pipeline = None
|
|
|
|
else:
|
|
# Direct CPU loading
|
|
self.vlm_pipeline = ov_genai.VLMPipeline(str(self.vlm_dir), self.device)
|
|
print(f"[VLM DEBUG] ✅ VLMPipeline loaded on {self.device}!")
|
|
|
|
except Exception as e:
|
|
print(f"[VLM DEBUG] ❌ Failed to load VLMPipeline: {e}")
|
|
self.vlm_pipeline = None
|
|
|
|
except Exception as e:
|
|
print(f"[VLM DEBUG] ❌ Error in _load_model: {e}")
|
|
self.vlm_pipeline = None
|
|
|
|
def process_request(self, image, prompt):
|
|
"""Process a VLM request."""
|
|
self.mutex.lock()
|
|
try:
|
|
self.image = image
|
|
self.prompt = prompt
|
|
self.condition.wakeOne()
|
|
finally:
|
|
self.mutex.unlock()
|
|
|
|
def _process_request(self, image: np.ndarray, prompt: str) -> dict:
|
|
"""Process a single VLM request using OpenVINO GenAI."""
|
|
try:
|
|
if self.vlm_pipeline is None:
|
|
return {
|
|
"status": "error",
|
|
"message": "VLM pipeline not loaded",
|
|
"response": "❌ VLM pipeline failed to load. Check logs for OpenVINO GenAI setup.",
|
|
"confidence": 0.0,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"device": "none",
|
|
"processing_time": 0.0
|
|
}
|
|
|
|
return self._run_genai_inference(image, prompt)
|
|
|
|
except Exception as e:
|
|
print(f"[VLM DEBUG] Error in _process_request: {e}")
|
|
return {
|
|
"status": "error",
|
|
"message": str(e),
|
|
"response": f"❌ VLM processing error: {str(e)}",
|
|
"confidence": 0.0,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"device": getattr(self, 'device', 'unknown'),
|
|
"processing_time": 0.0
|
|
}
|
|
|
|
def _run_genai_inference(self, image: np.ndarray, prompt: str) -> dict:
|
|
"""Run inference using OpenVINO GenAI VLMPipeline."""
|
|
start_time = datetime.now()
|
|
|
|
try:
|
|
print(f"[VLM DEBUG] 🚀 Starting OpenVINO GenAI inference...")
|
|
print(f"[VLM DEBUG] 📝 Prompt: {prompt}")
|
|
print(f"[VLM DEBUG] 🖼️ Image shape: {image.shape}")
|
|
print(f"[VLM DEBUG] 🎯 Device: {self.device}")
|
|
|
|
# Convert numpy image to PIL Image
|
|
if not PIL_AVAILABLE:
|
|
return {
|
|
"status": "error",
|
|
"message": "PIL not available",
|
|
"response": "❌ PIL required for image processing",
|
|
"confidence": 0.0,
|
|
"timestamp": start_time.isoformat(),
|
|
"device": self.device,
|
|
"processing_time": 0.0
|
|
}
|
|
|
|
# Convert BGR to RGB if needed
|
|
if len(image.shape) == 3 and image.shape[2] == 3:
|
|
if image.dtype == np.uint8:
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
|
else:
|
|
image_rgb = image
|
|
else:
|
|
image_rgb = image
|
|
|
|
# Convert to PIL Image
|
|
pil_image = Image.fromarray(image_rgb.astype(np.uint8))
|
|
print(f"[VLM DEBUG] 🖼️ PIL Image size: {pil_image.size}")
|
|
|
|
# Convert PIL image to OpenVINO tensor
|
|
image_array = np.array(pil_image)
|
|
# Ensure NCHW format for OpenVINO
|
|
if len(image_array.shape) == 3:
|
|
image_array = np.transpose(image_array, (2, 0, 1)) # HWC to CHW
|
|
image_array = np.expand_dims(image_array, axis=0) # Add batch dimension
|
|
|
|
image_tensor = ov.Tensor(image_array)
|
|
print(f"[VLM DEBUG] 🔢 Image tensor shape: {image_tensor.shape}")
|
|
|
|
# Start chat session
|
|
print(f"[VLM DEBUG] 💬 Starting chat session...")
|
|
self.vlm_pipeline.start_chat()
|
|
|
|
# Generate response
|
|
print(f"[VLM DEBUG] 🎲 Generating response...")
|
|
response = self.vlm_pipeline.generate(
|
|
prompt,
|
|
image=image_tensor,
|
|
max_new_tokens=100,
|
|
do_sample=False
|
|
)
|
|
|
|
# Finish chat session
|
|
self.vlm_pipeline.finish_chat()
|
|
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
|
|
print(f"[VLM DEBUG] ✅ Generation complete!")
|
|
print(f"[VLM DEBUG] 📝 Response: {response}")
|
|
print(f"[VLM DEBUG] ⏱️ Processing time: {processing_time:.2f}s")
|
|
print(f"[VLM DEBUG] 🎯 Used device: {self.device}")
|
|
|
|
return {
|
|
"status": "success",
|
|
"message": "OpenVINO GenAI inference completed",
|
|
"response": response,
|
|
"confidence": 1.0, # GenAI doesn't provide confidence scores
|
|
"timestamp": start_time.isoformat(),
|
|
"device": self.device,
|
|
"processing_time": processing_time
|
|
}
|
|
|
|
except Exception as e:
|
|
processing_time = (datetime.now() - start_time).total_seconds()
|
|
error_msg = f"OpenVINO GenAI inference failed: {str(e)}"
|
|
print(f"[VLM DEBUG] ❌ {error_msg}")
|
|
|
|
# Try to finish chat session in case of error
|
|
try:
|
|
self.vlm_pipeline.finish_chat()
|
|
except:
|
|
pass
|
|
|
|
return {
|
|
"status": "error",
|
|
"message": error_msg,
|
|
"response": f"❌ VLM inference error: {str(e)}",
|
|
"confidence": 0.0,
|
|
"timestamp": start_time.isoformat(),
|
|
"device": self.device,
|
|
"processing_time": processing_time
|
|
}
|
|
|
|
def stop(self):
|
|
"""Stop the thread."""
|
|
self.mutex.lock()
|
|
self.abort = True
|
|
self.condition.wakeOne()
|
|
self.mutex.unlock()
|
|
|
|
|
|
class VLMController(QObject):
|
|
"""Main VLM controller class using OpenVINO GenAI."""
|
|
result_ready = Signal(dict)
|
|
error_occurred = Signal(str)
|
|
|
|
def __init__(self, vlm_dir=None):
|
|
super().__init__()
|
|
print(f"[VLM DEBUG] Initializing VLM Controller (OpenVINO GenAI)")
|
|
|
|
# Set VLM directory to the downloaded OpenVINO model
|
|
if vlm_dir is None:
|
|
current_dir = Path(__file__).parent.parent
|
|
vlm_dir = current_dir / "llava_openvino_model"
|
|
|
|
print(f"[VLM DEBUG] VLM directory: {vlm_dir}")
|
|
print(f"[VLM DEBUG] VLM directory exists: {vlm_dir.exists()}")
|
|
|
|
# Create worker thread
|
|
self.worker_thread = VLMControllerThread(vlm_dir)
|
|
self.worker_thread.result_ready.connect(self.result_ready.emit)
|
|
self.worker_thread.error_occurred.connect(self.error_occurred.emit)
|
|
self.worker_thread.start()
|
|
|
|
print(f"[VLM DEBUG] VLM Controller initialized successfully (OpenVINO GenAI)")
|
|
|
|
def process_image(self, image: np.ndarray, prompt: str):
|
|
"""Process an image with VLM."""
|
|
if self.worker_thread and self.worker_thread.isRunning():
|
|
self.worker_thread.process_request(image, prompt)
|
|
else:
|
|
self.error_occurred.emit("VLM worker thread not running")
|
|
|
|
def stop(self):
|
|
"""Stop the VLM controller."""
|
|
if self.worker_thread:
|
|
self.worker_thread.stop()
|
|
self.worker_thread.wait(5000) # Wait up to 5 seconds for thread to finish
|