chore: improve doc generation and test (#762)
Some checks failed
docker-build / platform-excludes (push) Has been cancelled
pre-commit / pre-commit (push) Has been cancelled
Run Pytest on Pull Request / test (push) Has been cancelled
docker-build / build (push) Has been cancelled
docker-build / merge (push) Has been cancelled
Close stale pull requests/issues / Find Stale issues and PRs (push) Has been cancelled

Improve documentation generation and add tests for documentation.
Extend sphinx by todo directive.

The configuration table is now split into several tables. The test
is adapted accordingly.

There is a new test that checks the docstrings to be compliant to the
RST format as used by sphinx to create the documentation. We can not
use Markdown in docstrings. The docstrings are adapted accordingly.

An additional test checks that the documentation can be build with sphinx.
This test takes very long is only enabled in full run (aka. ci) mode.

Signed-off-by: Bobby Noelte <b0661n0e17e@gmail.com>
This commit is contained in:
Bobby Noelte
2025-11-13 22:53:46 +01:00
committed by GitHub
parent 8da137f8f1
commit 7bf9dd723e
38 changed files with 3250 additions and 2092 deletions

View File

@@ -1,5 +1,6 @@
import json
import os
import shutil
import sys
from pathlib import Path
from unittest.mock import patch
@@ -9,6 +10,9 @@ import pytest
DIR_PROJECT_ROOT = Path(__file__).parent.parent
DIR_TESTDATA = Path(__file__).parent / "testdata"
DIR_DOCS_GENERATED = DIR_PROJECT_ROOT / "docs" / "_generated"
DIR_TEST_GENERATED = DIR_TESTDATA / "docs" / "_generated"
def test_openapi_spec_current(config_eos):
"""Verify the openapi spec hasn´t changed."""
@@ -74,11 +78,14 @@ def test_openapi_md_current(config_eos):
def test_config_md_current(config_eos):
"""Verify the generated configuration markdown hasn´t changed."""
expected_config_md_path = DIR_PROJECT_ROOT / "docs" / "_generated" / "config.md"
new_config_md_path = DIR_TESTDATA / "config-new.md"
assert DIR_DOCS_GENERATED.exists()
with expected_config_md_path.open("r", encoding="utf-8", newline=None) as f_expected:
expected_config_md = f_expected.read()
# Remove any leftover files from last run
if DIR_TEST_GENERATED.exists():
shutil.rmtree(DIR_TEST_GENERATED)
# Ensure test dir exists
DIR_TEST_GENERATED.mkdir(parents=True, exist_ok=True)
# Patch get_config and import within guard to patch global variables within the eos module.
with patch("akkudoktoreos.config.config.get_config", return_value=config_eos):
@@ -87,17 +94,33 @@ def test_config_md_current(config_eos):
sys.path.insert(0, str(root_dir))
from scripts import generate_config_md
config_md = generate_config_md.generate_config_md(config_eos)
# Get all the top level fields
field_names = sorted(config_eos.__class__.model_fields.keys())
if os.name == "nt":
config_md = config_md.replace("\\\\", "/")
with new_config_md_path.open("w", encoding="utf-8", newline="\n") as f_new:
f_new.write(config_md)
# Create the file paths
expected = [ DIR_DOCS_GENERATED / "config.md", DIR_DOCS_GENERATED / "configexample.md", ]
tested = [ DIR_TEST_GENERATED / "config.md", DIR_TEST_GENERATED / "configexample.md", ]
for field_name in field_names:
file_name = f"config{field_name.lower()}.md"
expected.append(DIR_DOCS_GENERATED / file_name)
tested.append(DIR_TEST_GENERATED / file_name)
try:
assert config_md == expected_config_md
except AssertionError as e:
pytest.fail(
f"Expected {new_config_md_path} to equal {expected_config_md_path}.\n"
+ f"If ok: `make gen-docs` or `cp {new_config_md_path} {expected_config_md_path}`\n"
)
# Create test files
config_md = generate_config_md.generate_config_md(tested[0], config_eos)
# Check test files are the same as the expected files
for i, expected_path in enumerate(expected):
tested_path = tested[i]
with expected_path.open("r", encoding="utf-8", newline=None) as f_expected:
expected_config_md = f_expected.read()
with tested_path.open("r", encoding="utf-8", newline=None) as f_expected:
tested_config_md = f_expected.read()
try:
assert tested_config_md == expected_config_md
except AssertionError as e:
pytest.fail(
f"Expected {tested_path} to equal {expected_path}.\n"
+ f"If ok: `make gen-docs` or `cp {tested_path} {expected_path}`\n"
)

178
tests/test_docsphinx.py Normal file
View File

@@ -0,0 +1,178 @@
import hashlib
import json
import os
import shutil
import subprocess
import sys
import tempfile
from fnmatch import fnmatch
from pathlib import Path
import pytest
DIR_PROJECT_ROOT = Path(__file__).absolute().parent.parent
DIR_BUILD = DIR_PROJECT_ROOT / "build"
DIR_BUILD_DOCS = DIR_PROJECT_ROOT / "build" / "docs"
DIR_DOCS = DIR_PROJECT_ROOT / "docs"
DIR_SRC = DIR_PROJECT_ROOT / "src"
HASH_FILE = DIR_BUILD / ".sphinx_hash.json"
# Allowed file suffixes to consider
ALLOWED_SUFFIXES = {".py", ".md", ".json"}
# Directory patterns to exclude (glob-like)
EXCLUDED_DIR_PATTERNS = {"*_autosum", "*__pycache__"}
def is_excluded_dir(path: Path) -> bool:
"""Check whether a directory should be excluded based on name patterns."""
return any(fnmatch(path.name, pattern) for pattern in EXCLUDED_DIR_PATTERNS)
def hash_tree(paths: list[Path], suffixes=ALLOWED_SUFFIXES) -> str:
"""Return SHA256 hash for files under `paths`.
Restricted by suffix, excluding excluded directory patterns.
"""
h = hashlib.sha256()
for root in paths:
if not root.exists():
continue
for p in sorted(root.rglob("*")):
# Skip excluded directories
if p.is_dir() and is_excluded_dir(p):
continue
# Skip files inside excluded directories
if any(is_excluded_dir(parent) for parent in p.parents):
continue
# Hash only allowed file types
if p.is_file() and p.suffix.lower() in suffixes:
h.update(p.read_bytes())
return h.hexdigest()
def find_sphinx_build() -> str:
venv = os.getenv("VIRTUAL_ENV")
paths = [Path(venv)] if venv else []
paths.append(DIR_PROJECT_ROOT / ".venv")
for base in paths:
cmd = base / ("Scripts" if os.name == "nt" else "bin") / ("sphinx-build.exe" if os.name == "nt" else "sphinx-build")
if cmd.exists():
return str(cmd)
return "sphinx-build"
@pytest.fixture(scope="session")
def sphinx_changed() -> bool:
"""Returns True if any watched files have changed since last run.
Hash is stored in .sphinx_hash.json.
"""
# Directories whose changes should trigger rebuilding docs
watched_paths = [Path("docs"), Path("src")]
current_hash = hash_tree(watched_paths)
# Load previous hash
try:
previous = json.loads(HASH_FILE.read_text())
previous_hash = previous.get("hash")
except Exception:
previous_hash = None
changed = (previous_hash != current_hash)
# Update stored hash
HASH_FILE.parent.mkdir(parents=True, exist_ok=True)
HASH_FILE.write_text(json.dumps({"hash": current_hash}, indent=2))
return changed
class TestSphinxDocumentation:
"""Test class to verify Sphinx documentation generation.
Ensures no major warnings are emitted.
"""
SPHINX_CMD = [
find_sphinx_build(),
"-M",
"html",
str(DIR_DOCS),
str(DIR_BUILD_DOCS),
]
def _cleanup_autosum_dirs(self):
"""Delete all *_autosum folders inside docs/."""
for folder in DIR_DOCS.rglob("*_autosum"):
if folder.is_dir():
shutil.rmtree(folder)
def _cleanup_build_dir(self):
"""Delete build/docs directory if present."""
if DIR_BUILD_DOCS.exists():
shutil.rmtree(DIR_BUILD_DOCS)
def test_sphinx_build(self, sphinx_changed: bool, is_full_run: bool):
"""Build Sphinx documentation and ensure no major warnings appear in the build output."""
if not is_full_run:
pytest.skip("Skipping Sphinx test — not full run")
if not sphinx_changed:
pytest.skip(f"Skipping Sphinx build — no relevant file changes detected: {HASH_FILE}")
# Ensure docs folder exists
if not Path("docs").exists():
pytest.skip(f"Skipping Sphinx build test - docs folder not present: {DIR_DOCS}")
# Clean directories
self._cleanup_autosum_dirs()
self._cleanup_build_dir()
# Set environment for sphinx run (sphinx will make eos create a config file)
eos_tmp_dir = tempfile.TemporaryDirectory()
eos_dir = str(eos_tmp_dir.name)
env = os.environ.copy()
env["EOS_DIR"] = eos_dir
env["EOS_CONFIG_DIR"] = eos_dir
try:
# Run sphinx-build
project_dir = Path(__file__).parent.parent
process = subprocess.run(
self.SPHINX_CMD,
check=True,
env=env,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
cwd=project_dir,
)
# Combine output
output = process.stdout + "\n" + process.stderr
returncode = process.returncode
except:
output = f"ERROR: Could not start sphinx-build - {self.SPHINX_CMD}"
returncode = -1
# Remove temporary EOS_DIR
eos_tmp_dir.cleanup()
assert returncode == 0
# Possible markers: ERROR: WARNING: TRACEBACK:
major_markers = ("ERROR:", "TRACEBACK:")
bad_lines = [
line for line in output.splitlines()
if any(marker in line for marker in major_markers)
]
assert not bad_lines, f"Sphinx build contained errors:\n" + "\n".join(bad_lines)

371
tests/test_docstringrst.py Normal file
View File

@@ -0,0 +1,371 @@
import importlib
import importlib.util
import inspect
import pkgutil
import re
import sys
from difflib import SequenceMatcher
from pathlib import Path
from docutils import nodes
from docutils.core import publish_parts
from docutils.frontend import OptionParser
from docutils.parsers.rst import Directive, Parser, directives
from docutils.utils import Reporter, new_document
from sphinx.ext.napoleon import Config as NapoleonConfig
from sphinx.ext.napoleon.docstring import GoogleDocstring
DIR_PROJECT_ROOT = Path(__file__).absolute().parent.parent
DIR_DOCS = DIR_PROJECT_ROOT / "docs"
PACKAGE_NAME = "akkudoktoreos"
# ---------------------------------------------------------------------------
# Location ignore rules (regex)
# ---------------------------------------------------------------------------
# Locations to ignore (regex). Note the escaped dot for literal '.'
IGNORE_LOCATIONS = [
r"\.__new__$",
# Pydantic
r"\.model_copy$",
r"\.model_dump$",
r"\.model_dump_json$",
r"\.field_serializer$",
r"\.field_validator$",
r"\.model_validator$",
r"\.computed_field$",
r"\.Field$",
r"\.FieldInfo.*",
r"\.ComputedFieldInfo.*",
r"\.PrivateAttr$",
# pathlib
r"\.Path.*",
# MarkdownIt
r"\.MarkdownIt.*",
# FastAPI
r"\.FastAPI.*",
r"\.FileResponse.*",
r"\.PdfResponse.*",
r"\.HTTPException$",
# bokeh
r"\.bokeh.*",
r"\.figure.*",
r"\.ColumnDataSource.*",
r"\.LinearAxis.*",
r"\.Range1d.*",
# BeautifulSoup
r"\.BeautifulSoup.*",
# ExponentialSmoothing
r"\.ExponentialSmoothing.*",
# Pendulum
r"\.Date$",
r"\.DateTime$",
r"\.Duration$",
# ABC
r"\.abstractmethod$",
# numpytypes
r"\.NDArray$",
# typing
r"\.ParamSpec",
r"\.TypeVar",
r"\.Annotated",
# contextlib
r"\.asynccontextmanager$",
# concurrent
r"\.ThreadPoolExecutor.*",
# asyncio
r"\.Lock.*",
# scipy
r"\.RegularGridInterpolator.*",
# pylogging
r"\.InterceptHandler.filter$",
# itertools
r"\.chain$",
# functools
r"\.partial$",
]
# ---------------------------------------------------------------------------
# Error message ignore rules by location (regex)
# ---------------------------------------------------------------------------
IGNORE_ERRORS_BY_LOCATION = {
r"^akkudoktoreos.*": [
r"Unexpected possible title overline or transition.*",
],
}
# --- Use your global paths ---
conf_path = DIR_DOCS / "conf.py"
spec = importlib.util.spec_from_file_location("sphinx_conf", conf_path)
if spec is None:
raise AssertionError(f"Can not import sphinx_conf from {conf_path}")
sphinx_conf = importlib.util.module_from_spec(spec)
sys.modules["sphinx_conf"] = sphinx_conf
if spec.loader is None:
raise AssertionError(f"Can not import sphinx_conf from {conf_path}")
spec.loader.exec_module(sphinx_conf)
# Build NapoleonConfig with all options
napoleon_config = NapoleonConfig(
napoleon_google_docstring=getattr(sphinx_conf, "napoleon_google_docstring", True),
napoleon_numpy_docstring=getattr(sphinx_conf, "napoleon_numpy_docstring", False),
napoleon_include_init_with_doc=getattr(sphinx_conf, "napoleon_include_init_with_doc", False),
napoleon_include_private_with_doc=getattr(sphinx_conf, "napoleon_include_private_with_doc", False),
napoleon_include_special_with_doc=getattr(sphinx_conf, "napoleon_include_special_with_doc", True),
napoleon_use_admonition_for_examples=getattr(sphinx_conf, "napoleon_use_admonition_for_examples", False),
napoleon_use_admonition_for_notes=getattr(sphinx_conf, "napoleon_use_admonition_for_notes", False),
napoleon_use_admonition_for_references=getattr(sphinx_conf, "napoleon_use_admonition_for_references", False),
napoleon_use_ivar=getattr(sphinx_conf, "napoleon_use_ivar", False),
napoleon_use_param=getattr(sphinx_conf, "napoleon_use_param", True),
napoleon_use_rtype=getattr(sphinx_conf, "napoleon_use_rtype", True),
napoleon_preprocess_types=getattr(sphinx_conf, "napoleon_preprocess_types", False),
napoleon_type_aliases=getattr(sphinx_conf, "napoleon_type_aliases", None),
napoleon_attr_annotations=getattr(sphinx_conf, "napoleon_attr_annotations", True),
)
FENCE_RE = re.compile(r"^```(\w*)\s*$")
def replace_fenced_code_blocks(doc: str) -> tuple[str, bool]:
"""Replace fenced code blocks (```lang) in a docstring with RST code-block syntax.
Returns:
(new_doc, changed):
new_doc: The docstring with replacements applied
changed: True if any fenced block was replaced
"""
out_lines = []
inside = False
lang = ""
buffer: list[str] = []
changed = False
lines = doc.split("\n")
for line in lines:
stripped = line.strip()
# Detect opening fence: ``` or ```python
m = FENCE_RE.match(stripped)
if m and not inside:
inside = True
lang = m.group(1) or ""
# Write RST code-block header
if lang:
out_lines.append(f" .. code-block:: {lang}")
else:
out_lines.append(" .. code-block::")
out_lines.append("") # blank line required by RST
changed = True
continue
# Detect closing fence ```
if stripped == "```" and inside:
# Emit fenced code content with indentation
for b in buffer:
out_lines.append(" " + b)
out_lines.append("") # trailing blank line to close environment
inside = False
buffer = []
continue
if inside:
buffer.append(line)
else:
out_lines.append(line)
# If doc ended while still in fenced code, flush
if inside:
changed = True
for b in buffer:
out_lines.append(" " + b)
out_lines.append("")
inside = False
return "\n".join(out_lines), changed
def prepare_docutils_for_sphinx():
class NoOpDirective(Directive):
has_content = True
required_arguments = 0
optional_arguments = 100
final_argument_whitespace = True
def run(self):
return []
for d in ["attribute", "data", "method", "function", "class", "event", "todo"]:
directives.register_directive(d, NoOpDirective)
def validate_rst(text: str) -> list[tuple[int, str]]:
"""Validate a string as reStructuredText.
Returns a list of tuples: (line_number, message).
"""
if not text or not text.strip():
return []
warnings: list[tuple[int, str]] = []
class RecordingReporter(Reporter):
"""Capture warnings/errors instead of halting."""
def system_message(self, level, message, *children, **kwargs):
line = kwargs.get("line", None)
warnings.append((line or 0, message))
return nodes.system_message(message, level=level, type=self.levels[level], *children, **kwargs)
# Create default settings
settings = OptionParser(components=(Parser,)).get_default_values()
document = new_document("<docstring>", settings=settings)
# Attach custom reporter
document.reporter = RecordingReporter(
source="<docstring>",
report_level=1, # capture warnings and above
halt_level=100, # never halt
stream=None,
debug=False
)
parser = Parser()
parser.parse(text, document)
return warnings
def iter_docstrings(package_name: str):
"""Yield docstrings of modules, classes, functions in the given package."""
package = importlib.import_module(package_name)
for module_info in pkgutil.walk_packages(package.__path__, package.__name__ + "."):
module = importlib.import_module(module_info.name)
# Module docstring
if module.__doc__:
yield f"Module {module.__name__}", inspect.getdoc(module)
# Classes + methods
for _, obj in inspect.getmembers(module):
if inspect.isclass(obj) or inspect.isfunction(obj):
if obj.__doc__:
yield f"{module.__name__}.{obj.__name__}", inspect.getdoc(obj)
# Methods of classes
if inspect.isclass(obj):
for _, meth in inspect.getmembers(obj, inspect.isfunction):
if meth.__doc__:
yield f"{module.__name__}.{obj.__name__}.{meth.__name__}", inspect.getdoc(meth)
def map_converted_to_original(orig: str, conv: str) -> dict[int,int]:
"""Map original docstring line to converted docstring line.
Returns:
mapping: key = converted line index (0-based), value = original line index (0-based).
"""
orig_lines = orig.splitlines()
conv_lines = conv.splitlines()
matcher = SequenceMatcher(None, orig_lines, conv_lines)
line_map = {}
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
if tag in ("equal", "replace"):
for o, c in zip(range(i1, i2), range(j1, j2)):
line_map[c] = o
elif tag == "insert":
for c in range(j1, j2):
line_map[c] = max(i1 - 1, 0)
return line_map
def test_all_docstrings_rst_compliant():
"""All docstrings must be valid reStructuredText."""
failures = []
for location, doc in iter_docstrings(PACKAGE_NAME):
# Skip ignored locations
if any(re.search(pat, location) for pat in IGNORE_LOCATIONS):
continue
# convert like sphinx napoleon does
doc_converted = str(GoogleDocstring(doc, napoleon_config))
# Register directives that sphinx knows - just to avaid errors
prepare_docutils_for_sphinx()
# Validate
messages = validate_rst(doc_converted)
if not messages:
continue
# Map converted line numbers back to original docstring
line_map = map_converted_to_original(doc, doc_converted)
# Filter messages
filtered_messages = []
ignore_msg_patterns = []
for loc_pattern, patterns in IGNORE_ERRORS_BY_LOCATION.items():
if re.search(loc_pattern, location):
ignore_msg_patterns.extend(patterns)
for conv_line, msg_text in messages:
orig_line = line_map.get(conv_line - 1, conv_line - 1) + 1
if any(re.search(pat, msg_text) for pat in ignore_msg_patterns):
continue
filtered_messages.append((orig_line, msg_text))
if filtered_messages:
failures.append((location, filtered_messages, doc, doc_converted))
# Raise AssertionError with nicely formatted output
if failures:
msg = "Invalid reST docstrings (see https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html for valid format):\n"
for location, errors, doc, doc_converted in failures:
msg += f"\n--- {location} ---\n"
msg += "\nConverted by Sphinx Napoleon:\n"
doc_lines = doc_converted.splitlines()
for i, line_content in enumerate(doc_lines, start=1):
line_str = f"{i:2}" # fixed-width
msg += f" L{line_str}: {line_content}\n"
msg += "\nOriginal:\n"
doc_lines = doc.splitlines()
error_map = {line: err for line, err in errors}
for i, line_content in enumerate(doc_lines, start=1):
line_str = f"{i:2}" # fixed-width
if i in error_map:
msg += f">>> L{line_str}: {line_content} <-- {error_map[i]}\n"
else:
msg += f" L{line_str}: {line_content}\n"
doc_fixed, changed = replace_fenced_code_blocks(doc)
if changed:
msg += "\nImproved for fenced code blocks:\n"
msg += '"""' + doc_fixed + '\n"""\n'
msg += f"Total: {len(failures)} docstrings"
raise AssertionError(msg)