chore: improve doc generation and test (#762)

Improve documentation generation and add tests for documentation. Extend sphinx by todo directive. The configuration table is now split into several tables. The test is adapted accordingly. There is a new test that checks the docstrings to be compliant to the RST format as used by sphinx to create the documentation. We can not use Markdown in docstrings. The docstrings are adapted accordingly. An additional test checks that the documentation can be build with sphinx. This test takes very long is only enabled in full run (aka. ci) mode. Signed-off-by: Bobby Noelte <b0661n0e17e@gmail.com>
2025-11-21 04:46:31 +00:00 · 2025-11-13 22:53:46 +01:00
parent 8da137f8f1
commit 7bf9dd723e
38 changed files with 3250 additions and 2092 deletions
--- a/tests/test_doc.py
+++ b/tests/test_doc.py
@@ -1,5 +1,6 @@
 import json
 import os
+import shutil
 import sys
 from pathlib import Path
 from unittest.mock import patch
@@ -9,6 +10,9 @@ import pytest
 DIR_PROJECT_ROOT = Path(__file__).parent.parent
 DIR_TESTDATA = Path(__file__).parent / "testdata"

+DIR_DOCS_GENERATED = DIR_PROJECT_ROOT / "docs" / "_generated"
+DIR_TEST_GENERATED = DIR_TESTDATA / "docs" / "_generated"
+

 def test_openapi_spec_current(config_eos):
    """Verify the openapi spec hasn´t changed."""
@@ -74,11 +78,14 @@ def test_openapi_md_current(config_eos):

 def test_config_md_current(config_eos):
    """Verify the generated configuration markdown hasn´t changed."""
-    expected_config_md_path = DIR_PROJECT_ROOT / "docs" / "_generated" / "config.md"
-    new_config_md_path = DIR_TESTDATA / "config-new.md"
+    assert DIR_DOCS_GENERATED.exists()

-    with expected_config_md_path.open("r", encoding="utf-8", newline=None) as f_expected:
-        expected_config_md = f_expected.read()
+    # Remove any leftover files from last run
+    if DIR_TEST_GENERATED.exists():
+        shutil.rmtree(DIR_TEST_GENERATED)
+
+    # Ensure test dir exists
+    DIR_TEST_GENERATED.mkdir(parents=True, exist_ok=True)

    # Patch get_config and import within guard to patch global variables within the eos module.
    with patch("akkudoktoreos.config.config.get_config", return_value=config_eos):
@@ -87,17 +94,33 @@ def test_config_md_current(config_eos):
        sys.path.insert(0, str(root_dir))
        from scripts import generate_config_md

-        config_md = generate_config_md.generate_config_md(config_eos)
+        # Get all the top level fields
+        field_names = sorted(config_eos.__class__.model_fields.keys())

-    if os.name == "nt":
-        config_md = config_md.replace("\\\\", "/")
-    with new_config_md_path.open("w", encoding="utf-8", newline="\n") as f_new:
-        f_new.write(config_md)
+        # Create the file paths
+        expected = [ DIR_DOCS_GENERATED / "config.md", DIR_DOCS_GENERATED / "configexample.md", ]
+        tested = [ DIR_TEST_GENERATED / "config.md", DIR_TEST_GENERATED / "configexample.md", ]
+        for field_name in field_names:
+            file_name = f"config{field_name.lower()}.md"
+            expected.append(DIR_DOCS_GENERATED / file_name)
+            tested.append(DIR_TEST_GENERATED / file_name)

-    try:
-        assert config_md == expected_config_md
-    except AssertionError as e:
-        pytest.fail(
-            f"Expected {new_config_md_path} to equal {expected_config_md_path}.\n"
-            + f"If ok: `make gen-docs` or `cp {new_config_md_path} {expected_config_md_path}`\n"
-        )
+        # Create test files
+        config_md = generate_config_md.generate_config_md(tested[0], config_eos)
+
+    # Check test files are the same as the expected files
+    for i, expected_path in enumerate(expected):
+        tested_path = tested[i]
+
+        with expected_path.open("r", encoding="utf-8", newline=None) as f_expected:
+            expected_config_md = f_expected.read()
+        with tested_path.open("r", encoding="utf-8", newline=None) as f_expected:
+            tested_config_md = f_expected.read()
+
+        try:
+            assert tested_config_md == expected_config_md
+        except AssertionError as e:
+            pytest.fail(
+                f"Expected {tested_path} to equal {expected_path}.\n"
+                + f"If ok: `make gen-docs` or `cp {tested_path} {expected_path}`\n"
+            )
--- a/tests/test_docsphinx.py
+++ b/tests/test_docsphinx.py
@@ -0,0 +1,178 @@
+import hashlib
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from fnmatch import fnmatch
+from pathlib import Path
+
+import pytest
+
+DIR_PROJECT_ROOT = Path(__file__).absolute().parent.parent
+DIR_BUILD = DIR_PROJECT_ROOT / "build"
+DIR_BUILD_DOCS = DIR_PROJECT_ROOT / "build" / "docs"
+DIR_DOCS = DIR_PROJECT_ROOT / "docs"
+DIR_SRC = DIR_PROJECT_ROOT / "src"
+
+HASH_FILE = DIR_BUILD / ".sphinx_hash.json"
+
+# Allowed file suffixes to consider
+ALLOWED_SUFFIXES = {".py", ".md", ".json"}
+
+# Directory patterns to exclude (glob-like)
+EXCLUDED_DIR_PATTERNS = {"*_autosum", "*__pycache__"}
+
+
+def is_excluded_dir(path: Path) -> bool:
+    """Check whether a directory should be excluded based on name patterns."""
+    return any(fnmatch(path.name, pattern) for pattern in EXCLUDED_DIR_PATTERNS)
+
+
+def hash_tree(paths: list[Path], suffixes=ALLOWED_SUFFIXES) -> str:
+    """Return SHA256 hash for files under `paths`.
+
+    Restricted by suffix, excluding excluded directory patterns.
+    """
+    h = hashlib.sha256()
+
+    for root in paths:
+        if not root.exists():
+            continue
+        for p in sorted(root.rglob("*")):
+            # Skip excluded directories
+            if p.is_dir() and is_excluded_dir(p):
+                continue
+
+            # Skip files inside excluded directories
+            if any(is_excluded_dir(parent) for parent in p.parents):
+                continue
+
+            # Hash only allowed file types
+            if p.is_file() and p.suffix.lower() in suffixes:
+                h.update(p.read_bytes())
+
+    return h.hexdigest()
+
+
+def find_sphinx_build() -> str:
+    venv = os.getenv("VIRTUAL_ENV")
+    paths = [Path(venv)] if venv else []
+    paths.append(DIR_PROJECT_ROOT / ".venv")
+
+    for base in paths:
+        cmd = base / ("Scripts" if os.name == "nt" else "bin") / ("sphinx-build.exe" if os.name == "nt" else "sphinx-build")
+        if cmd.exists():
+            return str(cmd)
+    return "sphinx-build"
+
+
+@pytest.fixture(scope="session")
+def sphinx_changed() -> bool:
+    """Returns True if any watched files have changed since last run.
+
+    Hash is stored in .sphinx_hash.json.
+    """
+    # Directories whose changes should trigger rebuilding docs
+    watched_paths = [Path("docs"), Path("src")]
+
+    current_hash = hash_tree(watched_paths)
+
+    # Load previous hash
+    try:
+        previous = json.loads(HASH_FILE.read_text())
+        previous_hash = previous.get("hash")
+    except Exception:
+        previous_hash = None
+
+    changed = (previous_hash != current_hash)
+
+    # Update stored hash
+    HASH_FILE.parent.mkdir(parents=True, exist_ok=True)
+    HASH_FILE.write_text(json.dumps({"hash": current_hash}, indent=2))
+
+    return changed
+
+
+class TestSphinxDocumentation:
+    """Test class to verify Sphinx documentation generation.
+
+    Ensures no major warnings are emitted.
+    """
+
+    SPHINX_CMD = [
+        find_sphinx_build(),
+        "-M",
+        "html",
+        str(DIR_DOCS),
+        str(DIR_BUILD_DOCS),
+    ]
+
+    def _cleanup_autosum_dirs(self):
+        """Delete all *_autosum folders inside docs/."""
+        for folder in DIR_DOCS.rglob("*_autosum"):
+            if folder.is_dir():
+                shutil.rmtree(folder)
+
+    def _cleanup_build_dir(self):
+        """Delete build/docs directory if present."""
+        if DIR_BUILD_DOCS.exists():
+            shutil.rmtree(DIR_BUILD_DOCS)
+
+    def test_sphinx_build(self, sphinx_changed: bool, is_full_run: bool):
+        """Build Sphinx documentation and ensure no major warnings appear in the build output."""
+        if not is_full_run:
+            pytest.skip("Skipping Sphinx test — not full run")
+
+        if not sphinx_changed:
+            pytest.skip(f"Skipping Sphinx build — no relevant file changes detected: {HASH_FILE}")
+
+        # Ensure docs folder exists
+        if not Path("docs").exists():
+            pytest.skip(f"Skipping Sphinx build test - docs folder not present: {DIR_DOCS}")
+
+        # Clean directories
+        self._cleanup_autosum_dirs()
+        self._cleanup_build_dir()
+
+        # Set environment for sphinx run (sphinx will make eos create a config file)
+        eos_tmp_dir = tempfile.TemporaryDirectory()
+        eos_dir = str(eos_tmp_dir.name)
+        env = os.environ.copy()
+        env["EOS_DIR"] = eos_dir
+        env["EOS_CONFIG_DIR"] = eos_dir
+
+        try:
+            # Run sphinx-build
+            project_dir = Path(__file__).parent.parent
+            process = subprocess.run(
+                self.SPHINX_CMD,
+                check=True,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                cwd=project_dir,
+            )
+            # Combine output
+            output = process.stdout + "\n" + process.stderr
+            returncode = process.returncode
+        except:
+            output = f"ERROR: Could not start sphinx-build - {self.SPHINX_CMD}"
+            returncode = -1
+
+        # Remove temporary EOS_DIR
+        eos_tmp_dir.cleanup()
+
+        assert returncode == 0
+
+        # Possible markers: ERROR: WARNING: TRACEBACK:
+        major_markers = ("ERROR:", "TRACEBACK:")
+
+        bad_lines = [
+            line for line in output.splitlines()
+            if any(marker in line for marker in major_markers)
+        ]
+
+        assert not bad_lines, f"Sphinx build contained errors:\n" + "\n".join(bad_lines)
--- a/tests/test_docstringrst.py
+++ b/tests/test_docstringrst.py
@@ -0,0 +1,371 @@
+import importlib
+import importlib.util
+import inspect
+import pkgutil
+import re
+import sys
+from difflib import SequenceMatcher
+from pathlib import Path
+
+from docutils import nodes
+from docutils.core import publish_parts
+from docutils.frontend import OptionParser
+from docutils.parsers.rst import Directive, Parser, directives
+from docutils.utils import Reporter, new_document
+from sphinx.ext.napoleon import Config as NapoleonConfig
+from sphinx.ext.napoleon.docstring import GoogleDocstring
+
+DIR_PROJECT_ROOT = Path(__file__).absolute().parent.parent
+DIR_DOCS = DIR_PROJECT_ROOT / "docs"
+
+PACKAGE_NAME = "akkudoktoreos"
+
+# ---------------------------------------------------------------------------
+# Location ignore rules (regex)
+# ---------------------------------------------------------------------------
+# Locations to ignore (regex). Note the escaped dot for literal '.'
+IGNORE_LOCATIONS = [
+    r"\.__new__$",
+
+    # Pydantic
+    r"\.model_copy$",
+    r"\.model_dump$",
+    r"\.model_dump_json$",
+    r"\.field_serializer$",
+    r"\.field_validator$",
+    r"\.model_validator$",
+    r"\.computed_field$",
+    r"\.Field$",
+    r"\.FieldInfo.*",
+    r"\.ComputedFieldInfo.*",
+    r"\.PrivateAttr$",
+
+    # pathlib
+    r"\.Path.*",
+
+    # MarkdownIt
+    r"\.MarkdownIt.*",
+
+    # FastAPI
+    r"\.FastAPI.*",
+    r"\.FileResponse.*",
+    r"\.PdfResponse.*",
+    r"\.HTTPException$",
+
+    # bokeh
+    r"\.bokeh.*",
+    r"\.figure.*",
+    r"\.ColumnDataSource.*",
+    r"\.LinearAxis.*",
+    r"\.Range1d.*",
+
+    # BeautifulSoup
+    r"\.BeautifulSoup.*",
+
+    # ExponentialSmoothing
+    r"\.ExponentialSmoothing.*",
+
+    # Pendulum
+    r"\.Date$",
+    r"\.DateTime$",
+    r"\.Duration$",
+
+    # ABC
+    r"\.abstractmethod$",
+
+    # numpytypes
+    r"\.NDArray$",
+
+    # typing
+    r"\.ParamSpec",
+    r"\.TypeVar",
+    r"\.Annotated",
+
+    # contextlib
+    r"\.asynccontextmanager$",
+
+    # concurrent
+    r"\.ThreadPoolExecutor.*",
+
+    # asyncio
+    r"\.Lock.*",
+
+    # scipy
+    r"\.RegularGridInterpolator.*",
+
+    # pylogging
+    r"\.InterceptHandler.filter$",
+
+    # itertools
+    r"\.chain$",
+
+    # functools
+    r"\.partial$",
+
+]
+
+# ---------------------------------------------------------------------------
+# Error message ignore rules by location (regex)
+# ---------------------------------------------------------------------------
+IGNORE_ERRORS_BY_LOCATION = {
+    r"^akkudoktoreos.*": [
+        r"Unexpected possible title overline or transition.*",
+    ],
+}
+
+
+# --- Use your global paths ---
+conf_path = DIR_DOCS / "conf.py"
+
+spec = importlib.util.spec_from_file_location("sphinx_conf", conf_path)
+if spec is None:
+    raise AssertionError(f"Can not import sphinx_conf from {conf_path}")
+sphinx_conf = importlib.util.module_from_spec(spec)
+sys.modules["sphinx_conf"] = sphinx_conf
+if spec.loader is None:
+    raise AssertionError(f"Can not import sphinx_conf from {conf_path}")
+spec.loader.exec_module(sphinx_conf)
+
+# Build NapoleonConfig with all options
+napoleon_config = NapoleonConfig(
+    napoleon_google_docstring=getattr(sphinx_conf, "napoleon_google_docstring", True),
+    napoleon_numpy_docstring=getattr(sphinx_conf, "napoleon_numpy_docstring", False),
+    napoleon_include_init_with_doc=getattr(sphinx_conf, "napoleon_include_init_with_doc", False),
+    napoleon_include_private_with_doc=getattr(sphinx_conf, "napoleon_include_private_with_doc", False),
+    napoleon_include_special_with_doc=getattr(sphinx_conf, "napoleon_include_special_with_doc", True),
+    napoleon_use_admonition_for_examples=getattr(sphinx_conf, "napoleon_use_admonition_for_examples", False),
+    napoleon_use_admonition_for_notes=getattr(sphinx_conf, "napoleon_use_admonition_for_notes", False),
+    napoleon_use_admonition_for_references=getattr(sphinx_conf, "napoleon_use_admonition_for_references", False),
+    napoleon_use_ivar=getattr(sphinx_conf, "napoleon_use_ivar", False),
+    napoleon_use_param=getattr(sphinx_conf, "napoleon_use_param", True),
+    napoleon_use_rtype=getattr(sphinx_conf, "napoleon_use_rtype", True),
+    napoleon_preprocess_types=getattr(sphinx_conf, "napoleon_preprocess_types", False),
+    napoleon_type_aliases=getattr(sphinx_conf, "napoleon_type_aliases", None),
+    napoleon_attr_annotations=getattr(sphinx_conf, "napoleon_attr_annotations", True),
+)
+
+
+FENCE_RE = re.compile(r"^```(\w*)\s*$")
+
+
+def replace_fenced_code_blocks(doc: str) -> tuple[str, bool]:
+    """Replace fenced code blocks (```lang) in a docstring with RST code-block syntax.
+
+    Returns:
+        (new_doc, changed):
+            new_doc: The docstring with replacements applied
+            changed: True if any fenced block was replaced
+    """
+    out_lines = []
+    inside = False
+    lang = ""
+    buffer: list[str] = []
+    changed = False
+
+    lines = doc.split("\n")
+
+    for line in lines:
+        stripped = line.strip()
+
+        # Detect opening fence: ``` or ```python
+        m = FENCE_RE.match(stripped)
+        if m and not inside:
+            inside = True
+            lang = m.group(1) or ""
+            # Write RST code-block header
+            if lang:
+                out_lines.append(f"    .. code-block:: {lang}")
+            else:
+                out_lines.append("    .. code-block::")
+            out_lines.append("")  # blank line required by RST
+            changed = True
+            continue
+
+        # Detect closing fence ```
+        if stripped == "```" and inside:
+            # Emit fenced code content with indentation
+            for b in buffer:
+                out_lines.append("    " + b)
+            out_lines.append("")  # trailing blank line to close environment
+            inside = False
+            buffer = []
+            continue
+
+        if inside:
+            buffer.append(line)
+        else:
+            out_lines.append(line)
+
+    # If doc ended while still in fenced code, flush
+    if inside:
+        changed = True
+        for b in buffer:
+            out_lines.append("    " + b)
+        out_lines.append("")
+        inside = False
+
+    return "\n".join(out_lines), changed
+
+
+def prepare_docutils_for_sphinx():
+
+    class NoOpDirective(Directive):
+        has_content = True
+        required_arguments = 0
+        optional_arguments = 100
+        final_argument_whitespace = True
+        def run(self):
+            return []
+
+    for d in ["attribute", "data", "method", "function", "class", "event", "todo"]:
+        directives.register_directive(d, NoOpDirective)
+
+
+def validate_rst(text: str) -> list[tuple[int, str]]:
+    """Validate a string as reStructuredText.
+
+    Returns a list of tuples: (line_number, message).
+    """
+    if not text or not text.strip():
+        return []
+
+    warnings: list[tuple[int, str]] = []
+
+    class RecordingReporter(Reporter):
+        """Capture warnings/errors instead of halting."""
+        def system_message(self, level, message, *children, **kwargs):
+            line = kwargs.get("line", None)
+            warnings.append((line or 0, message))
+            return nodes.system_message(message, level=level, type=self.levels[level], *children, **kwargs)
+
+    # Create default settings
+    settings = OptionParser(components=(Parser,)).get_default_values()
+
+    document = new_document("<docstring>", settings=settings)
+
+    # Attach custom reporter
+    document.reporter = RecordingReporter(
+        source="<docstring>",
+        report_level=1,  # capture warnings and above
+        halt_level=100,  # never halt
+        stream=None,
+        debug=False
+    )
+
+    parser = Parser()
+    parser.parse(text, document)
+
+    return warnings
+
+
+def iter_docstrings(package_name: str):
+    """Yield docstrings of modules, classes, functions in the given package."""
+
+    package = importlib.import_module(package_name)
+
+    for module_info in pkgutil.walk_packages(package.__path__, package.__name__ + "."):
+        module = importlib.import_module(module_info.name)
+
+        # Module docstring
+        if module.__doc__:
+            yield f"Module {module.__name__}", inspect.getdoc(module)
+
+        # Classes + methods
+        for _, obj in inspect.getmembers(module):
+            if inspect.isclass(obj) or inspect.isfunction(obj):
+                if obj.__doc__:
+                    yield f"{module.__name__}.{obj.__name__}", inspect.getdoc(obj)
+
+                # Methods of classes
+                if inspect.isclass(obj):
+                    for _, meth in inspect.getmembers(obj, inspect.isfunction):
+                        if meth.__doc__:
+                            yield f"{module.__name__}.{obj.__name__}.{meth.__name__}", inspect.getdoc(meth)
+
+
+def map_converted_to_original(orig: str, conv: str) -> dict[int,int]:
+    """Map original docstring line to converted docstring line.
+
+    Returns:
+        mapping: key = converted line index (0-based), value = original line index (0-based).
+    """
+    orig_lines = orig.splitlines()
+    conv_lines = conv.splitlines()
+
+    matcher = SequenceMatcher(None, orig_lines, conv_lines)
+    line_map = {}
+    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
+        if tag in ("equal", "replace"):
+            for o, c in zip(range(i1, i2), range(j1, j2)):
+                line_map[c] = o
+        elif tag == "insert":
+            for c in range(j1, j2):
+                line_map[c] = max(i1 - 1, 0)
+    return line_map
+
+
+def test_all_docstrings_rst_compliant():
+    """All docstrings must be valid reStructuredText."""
+    failures = []
+
+    for location, doc in iter_docstrings(PACKAGE_NAME):
+        # Skip ignored locations
+        if any(re.search(pat, location) for pat in IGNORE_LOCATIONS):
+            continue
+
+        # convert like sphinx napoleon does
+        doc_converted = str(GoogleDocstring(doc, napoleon_config))
+
+        # Register directives that sphinx knows - just to avaid errors
+        prepare_docutils_for_sphinx()
+
+        # Validate
+        messages = validate_rst(doc_converted)
+        if not messages:
+            continue
+
+        # Map converted line numbers back to original docstring
+        line_map = map_converted_to_original(doc, doc_converted)
+
+        # Filter messages
+        filtered_messages = []
+        ignore_msg_patterns = []
+        for loc_pattern, patterns in IGNORE_ERRORS_BY_LOCATION.items():
+            if re.search(loc_pattern, location):
+                ignore_msg_patterns.extend(patterns)
+
+        for conv_line, msg_text in messages:
+                orig_line = line_map.get(conv_line - 1, conv_line - 1) + 1
+                if any(re.search(pat, msg_text) for pat in ignore_msg_patterns):
+                    continue
+                filtered_messages.append((orig_line, msg_text))
+
+        if filtered_messages:
+            failures.append((location, filtered_messages, doc, doc_converted))
+
+    # Raise AssertionError with nicely formatted output
+    if failures:
+        msg = "Invalid reST docstrings (see https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html for valid format):\n"
+        for location, errors, doc, doc_converted in failures:
+            msg += f"\n--- {location} ---\n"
+            msg += "\nConverted by Sphinx Napoleon:\n"
+            doc_lines = doc_converted.splitlines()
+            for i, line_content in enumerate(doc_lines, start=1):
+                line_str = f"{i:2}"  # fixed-width
+                msg += f"    L{line_str}: {line_content}\n"
+            msg += "\nOriginal:\n"
+            doc_lines = doc.splitlines()
+            error_map = {line: err for line, err in errors}
+            for i, line_content in enumerate(doc_lines, start=1):
+                line_str = f"{i:2}"  # fixed-width
+                if i in error_map:
+                    msg += f">>> L{line_str}: {line_content}  <-- {error_map[i]}\n"
+                else:
+                    msg += f"    L{line_str}: {line_content}\n"
+            doc_fixed, changed = replace_fenced_code_blocks(doc)
+            if changed:
+                msg += "\nImproved for fenced code blocks:\n"
+                msg += '"""' + doc_fixed + '\n"""\n'
+        msg += f"Total: {len(failures)} docstrings"
+
+        raise AssertionError(msg)