diff --git a/AppImage/config/verified_ai_models.json b/AppImage/config/verified_ai_models.json
index 2e0c716d..da915473 100644
--- a/AppImage/config/verified_ai_models.json
+++ b/AppImage/config/verified_ai_models.json
@@ -1,7 +1,8 @@
 {
   "_description": "Verified AI models for ProxMenux notifications. Only models listed here will be shown to users. Models are tested to work with the chat/completions API format.",
-  "_updated": "2026-03-20",
-  
+  "_updated": "2026-04-19",
+  "_verifier": "Refreshed with tools/ai-models-verifier (private). Re-run before each ProxMenux release to keep the list current. The verifier and ProxMenux share the same reasoning/thinking-model handlers so their verdicts stay aligned with runtime behaviour.",
+
   "groq": {
     "models": [
       "llama-3.3-70b-versatile",
@@ -12,37 +13,46 @@
       "mixtral-8x7b-32768",
       "gemma2-9b-it"
     ],
-    "recommended": "llama-3.3-70b-versatile"
+    "recommended": "llama-3.3-70b-versatile",
+    "_note": "Not yet re-verified in 2026-04 refresh — kept from previous curation. Run the verifier with a Groq key to prune deprecated entries."
   },
-  
+
   "gemini": {
     "models": [
-      "gemini-2.5-flash",
       "gemini-2.5-flash-lite",
-      "gemini-2.5-pro"
+      "gemini-2.5-flash",
+      "gemini-3-flash-preview"
     ],
-    "recommended": "gemini-2.5-flash",
-    "_note": "gemini-2.5-flash-lite is cheaper but may struggle with complex prompts. Use with simple/custom prompts.",
+    "recommended": "gemini-2.5-flash-lite",
+    "_note": "flash-lite / flash pass the verifier consistently; pro variants reject thinkingBudget=0 and are overkill for notification translation anyway. 'latest' aliases (gemini-flash-latest, gemini-flash-lite-latest) are intentionally omitted because they resolved to different models across runs and produced timeouts in some regions.",
     "_deprecated": ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-1.5-flash", "gemini-1.0-pro", "gemini-pro"]
   },
-  
+
   "openai": {
     "models": [
+      "gpt-4.1-nano",
       "gpt-4.1-mini",
-      "gpt-4o-mini"
+      "gpt-4o-mini",
+      "gpt-4.1",
+      "gpt-4o",
+      "gpt-5-chat-latest",
+      "gpt-5.4-nano",
+      "gpt-5.4-mini"
     ],
-    "recommended": "gpt-4o-mini"
+    "recommended": "gpt-4.1-nano",
+    "_note": "Reasoning models (o-series, gpt-5/5.1/5.2 non-chat variants) are supported by openai_provider.py via max_completion_tokens + reasoning_effort=minimal, but not listed here by default: their latency is higher than the chat models and they do not improve translation quality for notifications. Add specific reasoning IDs to this list only if a user explicitly wants them."
   },
-  
+
   "anthropic": {
     "models": [
       "claude-3-5-haiku-latest",
       "claude-3-5-sonnet-latest",
       "claude-3-opus-latest"
     ],
-    "recommended": "claude-3-5-haiku-latest"
+    "recommended": "claude-3-5-haiku-latest",
+    "_note": "Not re-verified in 2026-04 refresh — kept from previous curation. Add claude-4.x / claude-4.5 / claude-4.6 / claude-4.7 variants after running the verifier with an Anthropic key."
   },
-  
+
   "openrouter": {
     "models": [
       "meta-llama/llama-3.3-70b-instruct",
@@ -50,14 +60,15 @@
       "meta-llama/llama-3.1-8b-instruct",
       "anthropic/claude-3.5-haiku",
       "anthropic/claude-3.5-sonnet",
-      "google/gemini-flash-2.5-flash-lite",
+      "google/gemini-flash-1.5",
       "openai/gpt-4o-mini",
       "mistralai/mistral-7b-instruct",
       "mistralai/mixtral-8x7b-instruct"
     ],
-    "recommended": "meta-llama/llama-3.3-70b-instruct"
+    "recommended": "meta-llama/llama-3.3-70b-instruct",
+    "_note": "Not re-verified in 2026-04 refresh. google/gemini-flash-2.5-flash-lite was malformed in the previous entry and has been replaced with google/gemini-flash-1.5."
   },
-  
+
   "ollama": {
     "_note": "Ollama models are local, we don't filter them. User manages their own models.",
     "models": [],
diff --git a/AppImage/scripts/ai_providers/gemini_provider.py b/AppImage/scripts/ai_providers/gemini_provider.py
index 49224fb6..85d251b8 100644
--- a/AppImage/scripts/ai_providers/gemini_provider.py
+++ b/AppImage/scripts/ai_providers/gemini_provider.py
@@ -30,6 +30,23 @@ class GeminiProvider(AIProvider):
         'gemini-1.0-pro',
         'gemini-pro',
     ]
+
+    @staticmethod
+    def _has_thinking_mode(model: str) -> bool:
+        """True for Gemini variants that enable "thinking" by default.
+
+        Gemini 2.5+ and 3.x Pro/Flash models spend output tokens on
+        internal reasoning before emitting the final answer. With a small
+        max_tokens budget (≤250) that consumes the whole allowance and
+        leaves an empty reply. For the short translate/explain use case
+        in ProxMenux we want direct output, so we disable thinking for
+        these. Lite variants (flash-lite) do NOT have thinking enabled
+        and are safe to leave alone.
+        """
+        m = model.lower()
+        if 'lite' in m:
+            return False
+        return m.startswith('gemini-2.5') or m.startswith('gemini-3')
     
     def list_models(self) -> List[str]:
         """List available Gemini models that support generateContent.
@@ -118,6 +135,18 @@ class GeminiProvider(AIProvider):
         url = f"{self.API_BASE}/{self.model}:generateContent?key={self.api_key}"
         
         # Gemini uses a specific format with contents array
+        gen_config = {
+            'maxOutputTokens': max_tokens,
+            'temperature': 0.3,
+        }
+
+        # Disable thinking on 2.5+ / 3.x pro & flash models so the limited
+        # output budget actually produces visible text. thinkingBudget=0
+        # is the official switch for this; lite variants and legacy
+        # models don't need (and ignore) the field.
+        if self._has_thinking_mode(self.model):
+            gen_config['thinkingConfig'] = {'thinkingBudget': 0}
+
         payload = {
             'systemInstruction': {
                 'parts': [{'text': system_prompt}]
@@ -128,10 +157,7 @@ class GeminiProvider(AIProvider):
                     'parts': [{'text': user_message}]
                 }
             ],
-            'generationConfig': {
-                'maxOutputTokens': max_tokens,
-                'temperature': 0.3,
-            }
+            'generationConfig': gen_config,
         }
         
         headers = {
diff --git a/AppImage/scripts/ai_providers/openai_provider.py b/AppImage/scripts/ai_providers/openai_provider.py
index d5877da5..86484767 100644
--- a/AppImage/scripts/ai_providers/openai_provider.py
+++ b/AppImage/scripts/ai_providers/openai_provider.py
@@ -37,23 +37,49 @@ class OpenAIProvider(AIProvider):
     
     # Recommended models for chat (in priority order)
     RECOMMENDED_PREFIXES = ['gpt-4o-mini', 'gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo']
+
+    @staticmethod
+    def _is_reasoning_model(model: str) -> bool:
+        """True for OpenAI reasoning models (o-series + non-chat gpt-5+).
+
+        These use a stricter API contract than chat models:
+          - Must use ``max_completion_tokens`` instead of ``max_tokens``
+          - ``temperature`` is not accepted (only the default is supported)
+
+        Chat-optimized variants (``gpt-5-chat-latest``,
+        ``gpt-5.1-chat-latest``, etc.) keep the classic contract and are
+        NOT flagged here.
+        """
+        m = model.lower()
+        # o1, o3, o4, o5 ...  (o<digit>...)
+        if len(m) >= 2 and m[0] == 'o' and m[1].isdigit():
+            return True
+        # gpt-5, gpt-5-mini, gpt-5.1, gpt-5.2-pro ...  EXCEPT *-chat-latest
+        if m.startswith('gpt-5') and '-chat' not in m:
+            return True
+        return False
     
     def list_models(self) -> List[str]:
-        """List available OpenAI models for chat completions.
-        
-        Filters to only chat-capable models, excluding:
-        - Embedding models
-        - Audio/speech models (whisper, tts)
-        - Image models (dall-e)
-        - Instruct models (different API)
-        - Legacy models (babbage, davinci, etc.)
-        
+        """List available models for chat completions.
+
+        Two modes:
+        - Official OpenAI (no custom base_url): restrict to GPT chat models,
+          excluding embedding/whisper/tts/dall-e/instruct/legacy variants.
+        - OpenAI-compatible endpoint (LiteLLM, MLX, LM Studio, vLLM,
+          LocalAI, Ollama-proxy, etc.): the "gpt" substring check is
+          dropped so user-served models (e.g. ``mlx-community/Llama-3.1-8B``,
+          ``Qwen3-32B``, ``mistralai/...``) show up. EXCLUDED_PATTERNS
+          still applies — embeddings/whisper/tts aren't chat-capable on
+          any backend.
+
         Returns:
             List of model IDs suitable for chat completions.
         """
         if not self.api_key:
             return []
-        
+
+        is_custom_endpoint = bool(self.base_url)
+
         try:
             # Determine models URL from base_url if set
             if self.base_url:
@@ -63,42 +89,46 @@ class OpenAIProvider(AIProvider):
                 models_url = f"{base}/models"
             else:
                 models_url = self.DEFAULT_MODELS_URL
-            
+
             req = urllib.request.Request(
                 models_url,
                 headers={'Authorization': f'Bearer {self.api_key}'},
                 method='GET'
             )
-            
+
             with urllib.request.urlopen(req, timeout=10) as resp:
                 data = json.loads(resp.read().decode('utf-8'))
-            
+
             models = []
             for model in data.get('data', []):
                 model_id = model.get('id', '')
                 if not model_id:
                     continue
-                
+
                 model_lower = model_id.lower()
-                
-                # Must be a GPT model
-                if 'gpt' not in model_lower:
+
+                # Official OpenAI: restrict to GPT chat models. Custom
+                # endpoints serve arbitrarily named models, so this
+                # substring check would drop every valid result there.
+                if not is_custom_endpoint and 'gpt' not in model_lower:
                     continue
-                
-                # Exclude non-chat models
+
+                # Exclude non-chat models on every backend.
                 if any(pattern in model_lower for pattern in self.EXCLUDED_PATTERNS):
                     continue
-                
+
                 models.append(model_id)
-            
-            # Sort with recommended models first
+
+            # Sort with recommended models first (only meaningful for OpenAI
+            # official; on custom endpoints the prefixes rarely match, so
+            # entries fall through to alphabetical order, which is fine).
             def sort_key(m):
                 m_lower = m.lower()
                 for i, prefix in enumerate(self.RECOMMENDED_PREFIXES):
                     if m_lower.startswith(prefix):
                         return (i, m)
                 return (len(self.RECOMMENDED_PREFIXES), m)
-            
+
             return sorted(models, key=sort_key)
         except Exception as e:
             print(f"[OpenAIProvider] Failed to list models: {e}")
@@ -133,17 +163,35 @@ class OpenAIProvider(AIProvider):
         """
         if not self.api_key:
             raise AIProviderError("API key required for OpenAI")
-        
+
         payload = {
             'model': self.model,
             'messages': [
                 {'role': 'system', 'content': system_prompt},
                 {'role': 'user', 'content': user_message},
             ],
-            'max_tokens': max_tokens,
-            'temperature': 0.3,
         }
-        
+
+        # Reasoning models (o1/o3/o4/gpt-5*, excluding *-chat-latest) use a
+        # different parameter contract: max_completion_tokens instead of
+        # max_tokens, and no temperature field. Sending the classic chat
+        # parameters to them produces HTTP 400 Bad Request.
+        #
+        # They also spend output budget on internal reasoning by default,
+        # which empties the user-visible reply when max_tokens is small
+        # (like the ~200 we use for notifications). reasoning_effort
+        # 'minimal' keeps that internal reasoning to a minimum so the
+        # entire budget is available for the translation, which is
+        # exactly what this pipeline wants. OpenAI documents 'minimal',
+        # 'low', 'medium', 'high' — 'minimal' is the right setting for a
+        # straightforward translate+explain task.
+        if self._is_reasoning_model(self.model):
+            payload['max_completion_tokens'] = max_tokens
+            payload['reasoning_effort'] = 'minimal'
+        else:
+            payload['max_tokens'] = max_tokens
+            payload['temperature'] = 0.3
+
         headers = {
             'Content-Type': 'application/json',
             'Authorization': f'Bearer {self.api_key}',
diff --git a/AppImage/scripts/flask_notification_routes.py b/AppImage/scripts/flask_notification_routes.py
index 7c3294b7..024804a0 100644
--- a/AppImage/scripts/flask_notification_routes.py
+++ b/AppImage/scripts/flask_notification_routes.py
@@ -220,10 +220,20 @@ def get_provider_models():
         
         # Get all models from provider API
         api_models = ai_provider.list_models()
-        
+
+        # OpenAI with a custom base URL means an OpenAI-compatible endpoint
+        # (LiteLLM, MLX, LM Studio, vLLM, LocalAI, Ollama-proxy...). The
+        # verified_ai_models.json list only contains official OpenAI IDs
+        # (gpt-4o-mini etc.), so intersecting against it would strip every
+        # model the user actually serves. Treat the custom-endpoint case
+        # like Ollama: return whatever the endpoint advertises, no filter.
+        is_openai_compat = (provider == 'openai' and bool(openai_base_url))
+
         if not api_models:
-            # API failed, fall back to verified list only
-            if verified_models:
+            # API failed, fall back to verified list only (but not for
+            # custom endpoints — we don't know what the endpoint serves,
+            # so "gpt-4o-mini" as a fallback would be misleading).
+            if verified_models and not is_openai_compat:
                 models = sorted(verified_models)
                 return jsonify({
                     'success': True,
@@ -232,27 +242,38 @@ def get_provider_models():
                     'message': f'{len(models)} verified models (API unavailable)'
                 })
             return jsonify({
-                'success': False, 
-                'models': [], 
-                'message': 'Could not retrieve models. Check your API key.'
+                'success': False,
+                'models': [],
+                'message': 'Could not retrieve models. Check your API key and endpoint URL.'
             })
-        
+
+        if is_openai_compat:
+            # Custom OpenAI-compatible endpoint: surface every model the
+            # endpoint reports. No verified-list intersection.
+            models = sorted(api_models)
+            return jsonify({
+                'success': True,
+                'models': models,
+                'recommended': models[0] if models else '',
+                'message': f'Found {len(models)} models on custom endpoint'
+            })
+
         # Filter: only models that are BOTH in API and verified list
         if verified_models:
             api_models_set = set(api_models)
             filtered_models = [m for m in verified_models if m in api_models_set]
-            
+
             if not filtered_models:
                 # No intersection - maybe verified list is outdated
                 # Return verified list anyway (will fail on use if truly unavailable)
                 filtered_models = list(verified_models)
-            
+
             # Sort with recommended first
             def sort_key(m):
                 if m == recommended:
                     return (0, m)
                 return (1, m)
-            
+
             models = sorted(filtered_models, key=sort_key)
         else:
             # No verified list for this provider, return all from API