mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-06-11 19:07:01 +00:00
389 lines
13 KiB
Python
389 lines
13 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Build the ProxMenux translation cache from translate calls in scripts/.
|
||
|
||
The generated JSON keeps the same shape used by scripts/utils.sh:
|
||
|
||
{
|
||
"Original English text": {
|
||
"es": "Translated text",
|
||
"fr": "Translated text"
|
||
}
|
||
}
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import ast
|
||
import json
|
||
import os
|
||
import subprocess
|
||
import re
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Iterable
|
||
from urllib.parse import quote
|
||
from urllib.request import Request, urlopen
|
||
|
||
|
||
DEFAULT_LANGUAGES = ("es", "fr", "de", "it", "pt")
|
||
DEFAULT_CONTEXT = "Context: Technical message for Proxmox and IT. Translate:"
|
||
TRANSLATE_CALL_RE = re.compile(
|
||
r"""translate\s+(?P<quote>["'])(?P<text>(?:\\.|(?! (?P=quote) ).)*?)(?P=quote)""",
|
||
re.VERBOSE | re.DOTALL,
|
||
)
|
||
|
||
|
||
def iter_script_files(
|
||
scripts_dir: Path, extra_files: Iterable[Path] = ()
|
||
) -> Iterable[Path]:
|
||
# Walk the main scripts tree.
|
||
for path in sorted(scripts_dir.rglob("*")):
|
||
if not path.is_file():
|
||
continue
|
||
if path.name == "utils.sh":
|
||
continue
|
||
if path.suffix not in {".sh", ".func"}:
|
||
continue
|
||
yield path
|
||
# Yield additional files passed explicitly (e.g. the root-level `menu`
|
||
# entry point or install_proxmenux*.sh). These live outside scripts/
|
||
# but still contain translate "..." calls we want in the cache.
|
||
# No extension filter and no utils.sh skip — the caller decided
|
||
# they belong, we just check the file actually exists.
|
||
for extra in extra_files:
|
||
if extra.is_file():
|
||
yield extra
|
||
|
||
|
||
def decode_shell_string(raw: str, quote_char: str) -> str:
|
||
if quote_char == "'":
|
||
return raw
|
||
try:
|
||
return ast.literal_eval(f'"{raw}"')
|
||
except Exception:
|
||
return raw.replace(r"\"", '"').replace(r"\\", "\\")
|
||
|
||
|
||
def extract_translate_texts(
|
||
scripts_dir: Path, extra_files: Iterable[Path] = ()
|
||
) -> list[str]:
|
||
found: dict[str, None] = {}
|
||
for path in iter_script_files(scripts_dir, extra_files):
|
||
try:
|
||
content = path.read_text(encoding="utf-8")
|
||
except UnicodeDecodeError:
|
||
content = path.read_text(encoding="utf-8", errors="replace")
|
||
|
||
for match in TRANSLATE_CALL_RE.finditer(content):
|
||
text = decode_shell_string(match.group("text"), match.group("quote"))
|
||
text = text.strip()
|
||
if text and "$" not in text and "`" not in text:
|
||
found.setdefault(text, None)
|
||
|
||
return sorted(found)
|
||
|
||
|
||
def translate_googletrans(text: str, dest_lang: str, context: str) -> str:
|
||
try:
|
||
from googletrans import Translator # type: ignore
|
||
except Exception as exc:
|
||
raise RuntimeError(
|
||
"googletrans is not installed. Install googletrans==4.0.0-rc1 "
|
||
"or run with --provider google-web."
|
||
) from exc
|
||
|
||
translator = Translator()
|
||
full_text = f"{context} {text}".strip()
|
||
return translator.translate(full_text, dest=dest_lang).text
|
||
|
||
|
||
def translate_google_web(text: str, dest_lang: str, context: str, timeout: int) -> str:
|
||
# The public Google endpoint is not prompt-aware: if we prepend context,
|
||
# it often translates and returns that context as part of the result.
|
||
full_text = text
|
||
url = (
|
||
"https://translate.googleapis.com/translate_a/single"
|
||
f"?client=gtx&sl=en&tl={quote(dest_lang)}&dt=t&q={quote(full_text)}"
|
||
)
|
||
req = Request(url, headers={"User-Agent": "ProxMenux translation cache builder"})
|
||
with urlopen(req, timeout=timeout) as response:
|
||
payload = json.loads(response.read().decode("utf-8"))
|
||
return "".join(part[0] for part in payload[0] if part and part[0])
|
||
|
||
|
||
def translate_appimage(
|
||
text: str,
|
||
dest_lang: str,
|
||
context: str,
|
||
timeout: int,
|
||
appimage_path: Path,
|
||
) -> str:
|
||
if not appimage_path.exists():
|
||
prev_path = appimage_path.with_name(appimage_path.name + ".prev")
|
||
if prev_path.exists():
|
||
appimage_path = prev_path
|
||
else:
|
||
raise FileNotFoundError(f"AppImage not found: {appimage_path}")
|
||
|
||
req = {
|
||
"text": text,
|
||
"dest_lang": dest_lang,
|
||
"context": context,
|
||
"cache_file": "",
|
||
}
|
||
env = os.environ.copy()
|
||
env.setdefault("APPIMAGE_EXTRACT_AND_RUN", "1")
|
||
completed = subprocess.run(
|
||
[str(appimage_path), "--translate"],
|
||
input=json.dumps(req, ensure_ascii=False),
|
||
text=True,
|
||
capture_output=True,
|
||
timeout=timeout,
|
||
check=False,
|
||
env=env,
|
||
)
|
||
if completed.returncode != 0:
|
||
raise RuntimeError((completed.stderr or completed.stdout).strip())
|
||
|
||
# AppRun may print a startup line before translate_cli.py emits JSON.
|
||
for line in reversed(completed.stdout.splitlines()):
|
||
line = line.strip()
|
||
if not line.startswith("{"):
|
||
continue
|
||
payload = json.loads(line)
|
||
if payload.get("success"):
|
||
return str(payload.get("text", text))
|
||
raise RuntimeError(str(payload.get("error", "unknown AppImage translation error")))
|
||
|
||
raise RuntimeError(f"AppImage did not return JSON: {completed.stdout.strip()}")
|
||
|
||
|
||
def clean_translation(value: str) -> str:
|
||
separator = r"[\s\u00a0]*[::]"
|
||
translate_labels = "Translate|Traducir|Traduire|Übersetzen|Tradurre|Traduci|Traduzir"
|
||
context_labels = "Context|Contexto|Contexte|Kontext|Contesto"
|
||
value = re.sub(
|
||
rf"^.*?({translate_labels}){separator}",
|
||
"",
|
||
value,
|
||
flags=re.IGNORECASE | re.DOTALL,
|
||
)
|
||
value = re.sub(
|
||
rf"^.*?({context_labels}){separator}.*?({translate_labels}){separator}",
|
||
"",
|
||
value,
|
||
flags=re.IGNORECASE | re.DOTALL,
|
||
)
|
||
value = re.sub(
|
||
rf"^.*?({context_labels}){separator}",
|
||
"",
|
||
value,
|
||
flags=re.IGNORECASE | re.DOTALL,
|
||
)
|
||
return value.strip()
|
||
|
||
|
||
def translate_text(
|
||
text: str,
|
||
dest_lang: str,
|
||
provider: str,
|
||
context: str,
|
||
timeout: int,
|
||
appimage_path: Path,
|
||
) -> str:
|
||
if provider == "googletrans":
|
||
translated = translate_googletrans(text, dest_lang, context)
|
||
elif provider == "google-web":
|
||
translated = translate_google_web(text, dest_lang, context, timeout)
|
||
elif provider == "appimage":
|
||
translated = translate_appimage(text, dest_lang, context, timeout, appimage_path)
|
||
else:
|
||
raise ValueError(f"Unknown provider: {provider}")
|
||
return clean_translation(translated) or text
|
||
|
||
|
||
def load_language_cache(path: Path) -> dict[str, str]:
|
||
if not path.exists():
|
||
return {}
|
||
try:
|
||
data = json.loads(path.read_text(encoding="utf-8"))
|
||
except Exception:
|
||
return {}
|
||
if not isinstance(data, dict):
|
||
return {}
|
||
return {str(text): str(value) for text, value in data.items()}
|
||
|
||
|
||
def write_language_cache(path: Path, cache: dict[str, str]) -> None:
|
||
path.parent.mkdir(parents=True, exist_ok=True)
|
||
tmp_path = path.with_suffix(path.suffix + ".tmp")
|
||
tmp_path.write_text(
|
||
json.dumps(cache, ensure_ascii=False, indent=2, sort_keys=True) + "\n",
|
||
encoding="utf-8",
|
||
)
|
||
tmp_path.replace(path)
|
||
|
||
|
||
def build_arg_parser() -> argparse.ArgumentParser:
|
||
parser = argparse.ArgumentParser(
|
||
description="Extract translate calls from scripts/ and build json/cache.json."
|
||
)
|
||
parser.add_argument("--scripts-dir", default="scripts", type=Path)
|
||
parser.add_argument(
|
||
"--extra-file",
|
||
action="append",
|
||
default=[],
|
||
type=Path,
|
||
metavar="PATH",
|
||
help=(
|
||
"Extra individual files to scan for translate calls in addition "
|
||
"to --scripts-dir. Useful for the root-level `menu` entry point "
|
||
"and install_proxmenux*.sh, which sit outside scripts/. "
|
||
"Pass multiple times to add more than one file."
|
||
),
|
||
)
|
||
parser.add_argument(
|
||
"--output-dir",
|
||
default=Path("lang"),
|
||
type=Path,
|
||
help="Directory where per-language JSON files are written. Default: lang",
|
||
)
|
||
parser.add_argument(
|
||
"--output",
|
||
default=None,
|
||
type=Path,
|
||
help="Deprecated combined cache path. If used, per-language files are written next to it under its parent directory.",
|
||
)
|
||
parser.add_argument(
|
||
"--languages",
|
||
default=",".join(DEFAULT_LANGUAGES),
|
||
help="Comma-separated destination languages. Default: es,fr,de,it,pt",
|
||
)
|
||
parser.add_argument(
|
||
"--provider",
|
||
choices=("appimage", "googletrans", "google-web"),
|
||
default="appimage",
|
||
help="Translation provider to use. Default: appimage",
|
||
)
|
||
parser.add_argument(
|
||
"--appimage-path",
|
||
default=Path("/usr/local/share/proxmenux/ProxMenux-Monitor.AppImage"),
|
||
type=Path,
|
||
help="Path to the ProxMenux AppImage when using --provider appimage.",
|
||
)
|
||
parser.add_argument("--context", default=DEFAULT_CONTEXT)
|
||
parser.add_argument("--timeout", default=30, type=int)
|
||
parser.add_argument("--sleep", default=0.15, type=float)
|
||
parser.add_argument(
|
||
"--refresh",
|
||
action="store_true",
|
||
help="Translate all entries again instead of reusing existing cache values.",
|
||
)
|
||
parser.add_argument(
|
||
"--extract-only",
|
||
action="store_true",
|
||
help="Only update the cache keys; missing translations are left empty.",
|
||
)
|
||
parser.add_argument(
|
||
"--limit",
|
||
type=int,
|
||
default=0,
|
||
help="Only process the first N extracted strings. Useful for test runs.",
|
||
)
|
||
parser.add_argument(
|
||
"--save-every",
|
||
type=int,
|
||
default=1,
|
||
help="Write the output JSON every N translated items. Default: 1",
|
||
)
|
||
return parser
|
||
|
||
|
||
def main() -> int:
|
||
args = build_arg_parser().parse_args()
|
||
scripts_dir = args.scripts_dir.resolve()
|
||
if args.output is not None:
|
||
output_dir = args.output.resolve().parent / "lang"
|
||
else:
|
||
output_dir = args.output_dir.resolve()
|
||
languages = [lang.strip() for lang in args.languages.split(",") if lang.strip()]
|
||
|
||
if not scripts_dir.is_dir():
|
||
print(f"Scripts directory not found: {scripts_dir}", file=sys.stderr)
|
||
return 1
|
||
if not languages:
|
||
print("No destination languages selected.", file=sys.stderr)
|
||
return 1
|
||
|
||
texts = extract_translate_texts(scripts_dir, args.extra_file)
|
||
if args.limit > 0:
|
||
texts = texts[: args.limit]
|
||
existing_by_lang = {
|
||
lang: load_language_cache(output_dir / f"{lang}.json")
|
||
for lang in languages
|
||
}
|
||
next_by_lang: dict[str, dict[str, str]] = {lang: {} for lang in languages}
|
||
print(f"Found {len(texts)} unique translate strings.", flush=True)
|
||
print(f"Output directory: {output_dir}", flush=True)
|
||
print(f"Languages: {', '.join(languages)}", flush=True)
|
||
|
||
failures: list[tuple[str, str, str]] = []
|
||
total = len(texts) * len(languages)
|
||
done = 0
|
||
|
||
for lang in languages:
|
||
existing = existing_by_lang.get(lang, {})
|
||
print(f"Starting language: {lang}", flush=True)
|
||
|
||
for index, text in enumerate(texts, start=1):
|
||
done += 1
|
||
if not args.refresh and existing.get(text):
|
||
next_by_lang[lang][text] = existing[text]
|
||
continue
|
||
if args.extract_only:
|
||
next_by_lang[lang][text] = existing.get(text, "")
|
||
continue
|
||
|
||
print(f"[{done}/{total}] {lang} ({index}/{len(texts)}): {text[:80]}", flush=True)
|
||
try:
|
||
next_by_lang[lang][text] = translate_text(
|
||
text,
|
||
lang,
|
||
args.provider,
|
||
args.context,
|
||
args.timeout,
|
||
args.appimage_path,
|
||
)
|
||
print(f" => {next_by_lang[lang][text][:100]}", flush=True)
|
||
except Exception as exc:
|
||
next_by_lang[lang][text] = existing.get(text, text)
|
||
failures.append((text, lang, str(exc)))
|
||
print(f" failed: {exc}", file=sys.stderr, flush=True)
|
||
if args.save_every > 0 and index % args.save_every == 0:
|
||
write_language_cache(output_dir / f"{lang}.json", next_by_lang[lang])
|
||
time.sleep(args.sleep)
|
||
|
||
write_language_cache(output_dir / f"{lang}.json", next_by_lang[lang])
|
||
print(f"Completed language: {lang}", flush=True)
|
||
|
||
for lang, cache in next_by_lang.items():
|
||
write_language_cache(output_dir / f"{lang}.json", cache)
|
||
|
||
if failures:
|
||
print(f"Completed with {len(failures)} translation failures.", file=sys.stderr, flush=True)
|
||
for text, lang, error in failures[:20]:
|
||
print(f"- {lang}: {text[:80]} -> {error}", file=sys.stderr, flush=True)
|
||
if len(failures) > 20:
|
||
print(f"... and {len(failures) - 20} more.", file=sys.stderr, flush=True)
|
||
return 2
|
||
|
||
print("Translation cache generated successfully.", flush=True)
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|