mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-25 08:56:21 +00:00
update verified_ai_models.json
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"_description": "Verified AI models for ProxMenux notifications. Only models listed here will be shown to users. Models are tested to work with the chat/completions API format.",
|
"_description": "Verified AI models for ProxMenux notifications. Only models listed here will be shown to users. Models are tested to work with the chat/completions API format.",
|
||||||
"_updated": "2026-03-20",
|
"_updated": "2026-04-19",
|
||||||
|
"_verifier": "Refreshed with tools/ai-models-verifier (private). Re-run before each ProxMenux release to keep the list current. The verifier and ProxMenux share the same reasoning/thinking-model handlers so their verdicts stay aligned with runtime behaviour.",
|
||||||
|
|
||||||
"groq": {
|
"groq": {
|
||||||
"models": [
|
"models": [
|
||||||
@@ -12,26 +13,34 @@
|
|||||||
"mixtral-8x7b-32768",
|
"mixtral-8x7b-32768",
|
||||||
"gemma2-9b-it"
|
"gemma2-9b-it"
|
||||||
],
|
],
|
||||||
"recommended": "llama-3.3-70b-versatile"
|
"recommended": "llama-3.3-70b-versatile",
|
||||||
|
"_note": "Not yet re-verified in 2026-04 refresh — kept from previous curation. Run the verifier with a Groq key to prune deprecated entries."
|
||||||
},
|
},
|
||||||
|
|
||||||
"gemini": {
|
"gemini": {
|
||||||
"models": [
|
"models": [
|
||||||
"gemini-2.5-flash",
|
|
||||||
"gemini-2.5-flash-lite",
|
"gemini-2.5-flash-lite",
|
||||||
"gemini-2.5-pro"
|
"gemini-2.5-flash",
|
||||||
|
"gemini-3-flash-preview"
|
||||||
],
|
],
|
||||||
"recommended": "gemini-2.5-flash",
|
"recommended": "gemini-2.5-flash-lite",
|
||||||
"_note": "gemini-2.5-flash-lite is cheaper but may struggle with complex prompts. Use with simple/custom prompts.",
|
"_note": "flash-lite / flash pass the verifier consistently; pro variants reject thinkingBudget=0 and are overkill for notification translation anyway. 'latest' aliases (gemini-flash-latest, gemini-flash-lite-latest) are intentionally omitted because they resolved to different models across runs and produced timeouts in some regions.",
|
||||||
"_deprecated": ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-1.5-flash", "gemini-1.0-pro", "gemini-pro"]
|
"_deprecated": ["gemini-2.0-flash", "gemini-2.0-flash-lite", "gemini-1.5-flash", "gemini-1.0-pro", "gemini-pro"]
|
||||||
},
|
},
|
||||||
|
|
||||||
"openai": {
|
"openai": {
|
||||||
"models": [
|
"models": [
|
||||||
|
"gpt-4.1-nano",
|
||||||
"gpt-4.1-mini",
|
"gpt-4.1-mini",
|
||||||
"gpt-4o-mini"
|
"gpt-4o-mini",
|
||||||
|
"gpt-4.1",
|
||||||
|
"gpt-4o",
|
||||||
|
"gpt-5-chat-latest",
|
||||||
|
"gpt-5.4-nano",
|
||||||
|
"gpt-5.4-mini"
|
||||||
],
|
],
|
||||||
"recommended": "gpt-4o-mini"
|
"recommended": "gpt-4.1-nano",
|
||||||
|
"_note": "Reasoning models (o-series, gpt-5/5.1/5.2 non-chat variants) are supported by openai_provider.py via max_completion_tokens + reasoning_effort=minimal, but not listed here by default: their latency is higher than the chat models and they do not improve translation quality for notifications. Add specific reasoning IDs to this list only if a user explicitly wants them."
|
||||||
},
|
},
|
||||||
|
|
||||||
"anthropic": {
|
"anthropic": {
|
||||||
@@ -40,7 +49,8 @@
|
|||||||
"claude-3-5-sonnet-latest",
|
"claude-3-5-sonnet-latest",
|
||||||
"claude-3-opus-latest"
|
"claude-3-opus-latest"
|
||||||
],
|
],
|
||||||
"recommended": "claude-3-5-haiku-latest"
|
"recommended": "claude-3-5-haiku-latest",
|
||||||
|
"_note": "Not re-verified in 2026-04 refresh — kept from previous curation. Add claude-4.x / claude-4.5 / claude-4.6 / claude-4.7 variants after running the verifier with an Anthropic key."
|
||||||
},
|
},
|
||||||
|
|
||||||
"openrouter": {
|
"openrouter": {
|
||||||
@@ -50,12 +60,13 @@
|
|||||||
"meta-llama/llama-3.1-8b-instruct",
|
"meta-llama/llama-3.1-8b-instruct",
|
||||||
"anthropic/claude-3.5-haiku",
|
"anthropic/claude-3.5-haiku",
|
||||||
"anthropic/claude-3.5-sonnet",
|
"anthropic/claude-3.5-sonnet",
|
||||||
"google/gemini-flash-2.5-flash-lite",
|
"google/gemini-flash-1.5",
|
||||||
"openai/gpt-4o-mini",
|
"openai/gpt-4o-mini",
|
||||||
"mistralai/mistral-7b-instruct",
|
"mistralai/mistral-7b-instruct",
|
||||||
"mistralai/mixtral-8x7b-instruct"
|
"mistralai/mixtral-8x7b-instruct"
|
||||||
],
|
],
|
||||||
"recommended": "meta-llama/llama-3.3-70b-instruct"
|
"recommended": "meta-llama/llama-3.3-70b-instruct",
|
||||||
|
"_note": "Not re-verified in 2026-04 refresh. google/gemini-flash-2.5-flash-lite was malformed in the previous entry and has been replaced with google/gemini-flash-1.5."
|
||||||
},
|
},
|
||||||
|
|
||||||
"ollama": {
|
"ollama": {
|
||||||
|
|||||||
@@ -31,6 +31,23 @@ class GeminiProvider(AIProvider):
|
|||||||
'gemini-pro',
|
'gemini-pro',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _has_thinking_mode(model: str) -> bool:
|
||||||
|
"""True for Gemini variants that enable "thinking" by default.
|
||||||
|
|
||||||
|
Gemini 2.5+ and 3.x Pro/Flash models spend output tokens on
|
||||||
|
internal reasoning before emitting the final answer. With a small
|
||||||
|
max_tokens budget (≤250) that consumes the whole allowance and
|
||||||
|
leaves an empty reply. For the short translate/explain use case
|
||||||
|
in ProxMenux we want direct output, so we disable thinking for
|
||||||
|
these. Lite variants (flash-lite) do NOT have thinking enabled
|
||||||
|
and are safe to leave alone.
|
||||||
|
"""
|
||||||
|
m = model.lower()
|
||||||
|
if 'lite' in m:
|
||||||
|
return False
|
||||||
|
return m.startswith('gemini-2.5') or m.startswith('gemini-3')
|
||||||
|
|
||||||
def list_models(self) -> List[str]:
|
def list_models(self) -> List[str]:
|
||||||
"""List available Gemini models that support generateContent.
|
"""List available Gemini models that support generateContent.
|
||||||
|
|
||||||
@@ -118,6 +135,18 @@ class GeminiProvider(AIProvider):
|
|||||||
url = f"{self.API_BASE}/{self.model}:generateContent?key={self.api_key}"
|
url = f"{self.API_BASE}/{self.model}:generateContent?key={self.api_key}"
|
||||||
|
|
||||||
# Gemini uses a specific format with contents array
|
# Gemini uses a specific format with contents array
|
||||||
|
gen_config = {
|
||||||
|
'maxOutputTokens': max_tokens,
|
||||||
|
'temperature': 0.3,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Disable thinking on 2.5+ / 3.x pro & flash models so the limited
|
||||||
|
# output budget actually produces visible text. thinkingBudget=0
|
||||||
|
# is the official switch for this; lite variants and legacy
|
||||||
|
# models don't need (and ignore) the field.
|
||||||
|
if self._has_thinking_mode(self.model):
|
||||||
|
gen_config['thinkingConfig'] = {'thinkingBudget': 0}
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
'systemInstruction': {
|
'systemInstruction': {
|
||||||
'parts': [{'text': system_prompt}]
|
'parts': [{'text': system_prompt}]
|
||||||
@@ -128,10 +157,7 @@ class GeminiProvider(AIProvider):
|
|||||||
'parts': [{'text': user_message}]
|
'parts': [{'text': user_message}]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
'generationConfig': {
|
'generationConfig': gen_config,
|
||||||
'maxOutputTokens': max_tokens,
|
|
||||||
'temperature': 0.3,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
|
|||||||
@@ -38,15 +38,39 @@ class OpenAIProvider(AIProvider):
|
|||||||
# Recommended models for chat (in priority order)
|
# Recommended models for chat (in priority order)
|
||||||
RECOMMENDED_PREFIXES = ['gpt-4o-mini', 'gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo']
|
RECOMMENDED_PREFIXES = ['gpt-4o-mini', 'gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo']
|
||||||
|
|
||||||
def list_models(self) -> List[str]:
|
@staticmethod
|
||||||
"""List available OpenAI models for chat completions.
|
def _is_reasoning_model(model: str) -> bool:
|
||||||
|
"""True for OpenAI reasoning models (o-series + non-chat gpt-5+).
|
||||||
|
|
||||||
Filters to only chat-capable models, excluding:
|
These use a stricter API contract than chat models:
|
||||||
- Embedding models
|
- Must use ``max_completion_tokens`` instead of ``max_tokens``
|
||||||
- Audio/speech models (whisper, tts)
|
- ``temperature`` is not accepted (only the default is supported)
|
||||||
- Image models (dall-e)
|
|
||||||
- Instruct models (different API)
|
Chat-optimized variants (``gpt-5-chat-latest``,
|
||||||
- Legacy models (babbage, davinci, etc.)
|
``gpt-5.1-chat-latest``, etc.) keep the classic contract and are
|
||||||
|
NOT flagged here.
|
||||||
|
"""
|
||||||
|
m = model.lower()
|
||||||
|
# o1, o3, o4, o5 ... (o<digit>...)
|
||||||
|
if len(m) >= 2 and m[0] == 'o' and m[1].isdigit():
|
||||||
|
return True
|
||||||
|
# gpt-5, gpt-5-mini, gpt-5.1, gpt-5.2-pro ... EXCEPT *-chat-latest
|
||||||
|
if m.startswith('gpt-5') and '-chat' not in m:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def list_models(self) -> List[str]:
|
||||||
|
"""List available models for chat completions.
|
||||||
|
|
||||||
|
Two modes:
|
||||||
|
- Official OpenAI (no custom base_url): restrict to GPT chat models,
|
||||||
|
excluding embedding/whisper/tts/dall-e/instruct/legacy variants.
|
||||||
|
- OpenAI-compatible endpoint (LiteLLM, MLX, LM Studio, vLLM,
|
||||||
|
LocalAI, Ollama-proxy, etc.): the "gpt" substring check is
|
||||||
|
dropped so user-served models (e.g. ``mlx-community/Llama-3.1-8B``,
|
||||||
|
``Qwen3-32B``, ``mistralai/...``) show up. EXCLUDED_PATTERNS
|
||||||
|
still applies — embeddings/whisper/tts aren't chat-capable on
|
||||||
|
any backend.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of model IDs suitable for chat completions.
|
List of model IDs suitable for chat completions.
|
||||||
@@ -54,6 +78,8 @@ class OpenAIProvider(AIProvider):
|
|||||||
if not self.api_key:
|
if not self.api_key:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
is_custom_endpoint = bool(self.base_url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Determine models URL from base_url if set
|
# Determine models URL from base_url if set
|
||||||
if self.base_url:
|
if self.base_url:
|
||||||
@@ -81,17 +107,21 @@ class OpenAIProvider(AIProvider):
|
|||||||
|
|
||||||
model_lower = model_id.lower()
|
model_lower = model_id.lower()
|
||||||
|
|
||||||
# Must be a GPT model
|
# Official OpenAI: restrict to GPT chat models. Custom
|
||||||
if 'gpt' not in model_lower:
|
# endpoints serve arbitrarily named models, so this
|
||||||
|
# substring check would drop every valid result there.
|
||||||
|
if not is_custom_endpoint and 'gpt' not in model_lower:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Exclude non-chat models
|
# Exclude non-chat models on every backend.
|
||||||
if any(pattern in model_lower for pattern in self.EXCLUDED_PATTERNS):
|
if any(pattern in model_lower for pattern in self.EXCLUDED_PATTERNS):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
models.append(model_id)
|
models.append(model_id)
|
||||||
|
|
||||||
# Sort with recommended models first
|
# Sort with recommended models first (only meaningful for OpenAI
|
||||||
|
# official; on custom endpoints the prefixes rarely match, so
|
||||||
|
# entries fall through to alphabetical order, which is fine).
|
||||||
def sort_key(m):
|
def sort_key(m):
|
||||||
m_lower = m.lower()
|
m_lower = m.lower()
|
||||||
for i, prefix in enumerate(self.RECOMMENDED_PREFIXES):
|
for i, prefix in enumerate(self.RECOMMENDED_PREFIXES):
|
||||||
@@ -140,10 +170,28 @@ class OpenAIProvider(AIProvider):
|
|||||||
{'role': 'system', 'content': system_prompt},
|
{'role': 'system', 'content': system_prompt},
|
||||||
{'role': 'user', 'content': user_message},
|
{'role': 'user', 'content': user_message},
|
||||||
],
|
],
|
||||||
'max_tokens': max_tokens,
|
|
||||||
'temperature': 0.3,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Reasoning models (o1/o3/o4/gpt-5*, excluding *-chat-latest) use a
|
||||||
|
# different parameter contract: max_completion_tokens instead of
|
||||||
|
# max_tokens, and no temperature field. Sending the classic chat
|
||||||
|
# parameters to them produces HTTP 400 Bad Request.
|
||||||
|
#
|
||||||
|
# They also spend output budget on internal reasoning by default,
|
||||||
|
# which empties the user-visible reply when max_tokens is small
|
||||||
|
# (like the ~200 we use for notifications). reasoning_effort
|
||||||
|
# 'minimal' keeps that internal reasoning to a minimum so the
|
||||||
|
# entire budget is available for the translation, which is
|
||||||
|
# exactly what this pipeline wants. OpenAI documents 'minimal',
|
||||||
|
# 'low', 'medium', 'high' — 'minimal' is the right setting for a
|
||||||
|
# straightforward translate+explain task.
|
||||||
|
if self._is_reasoning_model(self.model):
|
||||||
|
payload['max_completion_tokens'] = max_tokens
|
||||||
|
payload['reasoning_effort'] = 'minimal'
|
||||||
|
else:
|
||||||
|
payload['max_tokens'] = max_tokens
|
||||||
|
payload['temperature'] = 0.3
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': f'Bearer {self.api_key}',
|
'Authorization': f'Bearer {self.api_key}',
|
||||||
|
|||||||
@@ -221,9 +221,19 @@ def get_provider_models():
|
|||||||
# Get all models from provider API
|
# Get all models from provider API
|
||||||
api_models = ai_provider.list_models()
|
api_models = ai_provider.list_models()
|
||||||
|
|
||||||
|
# OpenAI with a custom base URL means an OpenAI-compatible endpoint
|
||||||
|
# (LiteLLM, MLX, LM Studio, vLLM, LocalAI, Ollama-proxy...). The
|
||||||
|
# verified_ai_models.json list only contains official OpenAI IDs
|
||||||
|
# (gpt-4o-mini etc.), so intersecting against it would strip every
|
||||||
|
# model the user actually serves. Treat the custom-endpoint case
|
||||||
|
# like Ollama: return whatever the endpoint advertises, no filter.
|
||||||
|
is_openai_compat = (provider == 'openai' and bool(openai_base_url))
|
||||||
|
|
||||||
if not api_models:
|
if not api_models:
|
||||||
# API failed, fall back to verified list only
|
# API failed, fall back to verified list only (but not for
|
||||||
if verified_models:
|
# custom endpoints — we don't know what the endpoint serves,
|
||||||
|
# so "gpt-4o-mini" as a fallback would be misleading).
|
||||||
|
if verified_models and not is_openai_compat:
|
||||||
models = sorted(verified_models)
|
models = sorted(verified_models)
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'success': True,
|
'success': True,
|
||||||
@@ -234,7 +244,18 @@ def get_provider_models():
|
|||||||
return jsonify({
|
return jsonify({
|
||||||
'success': False,
|
'success': False,
|
||||||
'models': [],
|
'models': [],
|
||||||
'message': 'Could not retrieve models. Check your API key.'
|
'message': 'Could not retrieve models. Check your API key and endpoint URL.'
|
||||||
|
})
|
||||||
|
|
||||||
|
if is_openai_compat:
|
||||||
|
# Custom OpenAI-compatible endpoint: surface every model the
|
||||||
|
# endpoint reports. No verified-list intersection.
|
||||||
|
models = sorted(api_models)
|
||||||
|
return jsonify({
|
||||||
|
'success': True,
|
||||||
|
'models': models,
|
||||||
|
'recommended': models[0] if models else '',
|
||||||
|
'message': f'Found {len(models)} models on custom endpoint'
|
||||||
})
|
})
|
||||||
|
|
||||||
# Filter: only models that are BOTH in API and verified list
|
# Filter: only models that are BOTH in API and verified list
|
||||||
|
|||||||
Reference in New Issue
Block a user