mirror of
https://github.com/Akkudoktor-EOS/EOS.git
synced 2026-03-15 02:56:17 +00:00
2195 lines
78 KiB
Python
2195 lines
78 KiB
Python
|
|
"""Abstract database interface."""
|
||
|
|
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import bisect
|
||
|
|
import gzip
|
||
|
|
import pickle
|
||
|
|
from abc import ABC, abstractmethod
|
||
|
|
from enum import Enum, auto
|
||
|
|
from pathlib import Path
|
||
|
|
from threading import Lock
|
||
|
|
from typing import (
|
||
|
|
TYPE_CHECKING,
|
||
|
|
Any,
|
||
|
|
Final,
|
||
|
|
Generic,
|
||
|
|
Iterable,
|
||
|
|
Iterator,
|
||
|
|
Literal,
|
||
|
|
Optional,
|
||
|
|
Protocol,
|
||
|
|
Self,
|
||
|
|
Type,
|
||
|
|
TypeVar,
|
||
|
|
Union,
|
||
|
|
)
|
||
|
|
|
||
|
|
from loguru import logger
|
||
|
|
from numpydantic import NDArray, Shape
|
||
|
|
|
||
|
|
from akkudoktoreos.core.coreabc import (
|
||
|
|
ConfigMixin,
|
||
|
|
DatabaseMixin,
|
||
|
|
SingletonMixin,
|
||
|
|
)
|
||
|
|
from akkudoktoreos.utils.datetimeutil import (
|
||
|
|
DateTime,
|
||
|
|
Duration,
|
||
|
|
to_datetime,
|
||
|
|
to_duration,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Key used to store metadata
|
||
|
|
DATABASE_METADATA_KEY: bytes = b"__metadata__"
|
||
|
|
|
||
|
|
# ==================== Abstract Database Interface ====================
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseABC(ABC, ConfigMixin):
|
||
|
|
"""Abstract base class for database.
|
||
|
|
|
||
|
|
All operations accept an optional `namespace` argument. Implementations should
|
||
|
|
treat None as the default/root namespace. Concrete implementations can map
|
||
|
|
namespace -> native namespace (LMDB DBI) or emulate namespaces (SQLite uses
|
||
|
|
a namespace column).
|
||
|
|
"""
|
||
|
|
|
||
|
|
@property
|
||
|
|
@abstractmethod
|
||
|
|
def is_open(self) -> bool:
|
||
|
|
"""Return whether the database connection is open."""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@property
|
||
|
|
def storage_path(self) -> Path:
|
||
|
|
"""Storage path for the database."""
|
||
|
|
return self.config.general.data_folder_path / "db" / self.__class__.__name__.lower()
|
||
|
|
|
||
|
|
@property
|
||
|
|
def compression_level(self) -> int:
|
||
|
|
"""Compression level for database record data."""
|
||
|
|
return self.config.database.compression_level
|
||
|
|
|
||
|
|
@property
|
||
|
|
def compression(self) -> bool:
|
||
|
|
"""Whether to compress stored values."""
|
||
|
|
return self.config.database.compression_level > 0
|
||
|
|
|
||
|
|
# Lifecycle
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def provider_id(self) -> str:
|
||
|
|
"""Return the unique identifier for the database provider.
|
||
|
|
|
||
|
|
To be implemented by derived classes.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def open(self, namespace: Optional[str] = None) -> None:
|
||
|
|
"""Open database connection and optionally set default namespace.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
namespace: Optional default namespace to prepare.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
RuntimeError: If the database cannot be opened.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def close(self) -> None:
|
||
|
|
"""Close the database connection and cleanup resources."""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def flush(self, namespace: Optional[str] = None) -> None:
|
||
|
|
"""Force synchronization of pending writes to storage (optional per-namespace)."""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
# Metadata operations
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def set_metadata(self, metadata: Optional[bytes], *, namespace: Optional[str] = None) -> None:
|
||
|
|
"""Save metadata for a given namespace.
|
||
|
|
|
||
|
|
Metadata is treated separately from data records and stored as a single object.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
metadata (bytes): Arbitrary metadata to save or None to delete metadata.
|
||
|
|
namespace (Optional[str]): Optional namespace under which to store metadata.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def get_metadata(self, namespace: Optional[str] = None) -> Optional[bytes]:
|
||
|
|
"""Load metadata for a given namespace.
|
||
|
|
|
||
|
|
Returns None if no metadata exists.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
namespace (Optional[str]): Optional namespace whose metadata to retrieve.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Optional[bytes]: The loaded metadata, or None if not found.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
# Basic record operations
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def save_records(
|
||
|
|
self, records: Iterable[tuple[bytes, bytes]], namespace: Optional[str] = None
|
||
|
|
) -> int:
|
||
|
|
"""Save multiple records into the specified namespace (or default).
|
||
|
|
|
||
|
|
Args:
|
||
|
|
records: Iterable providing key, value tuples ordered by key:
|
||
|
|
- key: Byte key (sortable) for the record.
|
||
|
|
- value: Serialized (and optionally compressed) bytes to store.
|
||
|
|
namespace: Optional namespace.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of records saved.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
RuntimeError: If DB not open or write failed.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def delete_records(self, keys: Iterable[bytes], namespace: Optional[str] = None) -> int:
|
||
|
|
"""Delete multiple records by key from the specified namespace.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
keys: Iterable that provides the Byte keys to delete.
|
||
|
|
namespace: Optional namespace.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of records actually deleted.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def iterate_records(
|
||
|
|
self,
|
||
|
|
start_key: Optional[bytes] = None,
|
||
|
|
end_key: Optional[bytes] = None,
|
||
|
|
namespace: Optional[str] = None,
|
||
|
|
reverse: bool = False,
|
||
|
|
) -> Iterator[tuple[bytes, bytes]]:
|
||
|
|
"""Iterate over records for a namespace with optional bounds.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
start_key: Inclusive start key, or None.
|
||
|
|
end_key: Exclusive end key, or None.
|
||
|
|
namespace: Optional namespace to target.
|
||
|
|
reverse: If True iterate in descending key order.
|
||
|
|
|
||
|
|
Yields:
|
||
|
|
Tuples of (key, record).
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def count_records(
|
||
|
|
self,
|
||
|
|
start_key: Optional[bytes] = None,
|
||
|
|
end_key: Optional[bytes] = None,
|
||
|
|
*,
|
||
|
|
namespace: Optional[str] = None,
|
||
|
|
) -> int:
|
||
|
|
"""Count records in [start_key, end_key) excluding metadata in specified namespace.
|
||
|
|
|
||
|
|
Excludes metadata records.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def get_key_range(
|
||
|
|
self, namespace: Optional[str] = None
|
||
|
|
) -> tuple[Optional[bytes], Optional[bytes]]:
|
||
|
|
"""Return (min_key, max_key) in the given namespace or (None, None) if empty."""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
@abstractmethod
|
||
|
|
def get_backend_stats(self, namespace: Optional[str] = None) -> dict[str, Any]:
|
||
|
|
"""Get backend-specific statistics; implementations may return namespace-specific data."""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
# Compression helpers
|
||
|
|
|
||
|
|
def serialize_data(self, data: bytes) -> bytes:
|
||
|
|
"""Optionally compress raw pickled data before storage.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: Raw pickled bytes.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Possibly compressed bytes.
|
||
|
|
"""
|
||
|
|
if self.compression:
|
||
|
|
return gzip.compress(data, compresslevel=self.compression_level)
|
||
|
|
return data
|
||
|
|
|
||
|
|
def deserialize_data(self, data: bytes) -> bytes:
|
||
|
|
"""Optionally decompress stored data.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
data: Stored bytes.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Raw pickled bytes (decompressed if needed).
|
||
|
|
"""
|
||
|
|
if len(data) >= 2 and data[:2] == b"\x1f\x8b":
|
||
|
|
try:
|
||
|
|
return gzip.decompress(data)
|
||
|
|
except gzip.BadGzipFile:
|
||
|
|
pass
|
||
|
|
return data
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseBackendABC(DatabaseABC, SingletonMixin):
|
||
|
|
"""Abstract base class for database backends.
|
||
|
|
|
||
|
|
All operations accept an optional `namespace` argument. Implementations should
|
||
|
|
treat None as the default/root namespace. Concrete implementations can map
|
||
|
|
namespace -> native namespace (LMDB DBI) or emulate namespaces (SQLite uses
|
||
|
|
a namespace column).
|
||
|
|
"""
|
||
|
|
|
||
|
|
connection: Any
|
||
|
|
lock: Lock
|
||
|
|
_is_open: bool
|
||
|
|
default_namespace: Optional[str]
|
||
|
|
|
||
|
|
def __init__(self, **kwargs: Any) -> None:
|
||
|
|
"""Initialize the DatabaseBackendABC base.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
**kwargs: Backend-specific options (ignored by base).
|
||
|
|
"""
|
||
|
|
self.connection = None
|
||
|
|
self.lock = Lock()
|
||
|
|
self._is_open = False
|
||
|
|
self.default_namespace = None
|
||
|
|
|
||
|
|
@property
|
||
|
|
def is_open(self) -> bool:
|
||
|
|
"""Return whether the database connection is open."""
|
||
|
|
return self._is_open
|
||
|
|
|
||
|
|
|
||
|
|
# ==================== Database Record Protocol Mixin ====================
|
||
|
|
|
||
|
|
|
||
|
|
class DataRecordProtocol(Protocol):
|
||
|
|
date_time: DateTime
|
||
|
|
|
||
|
|
def __init__(self, date_time: Any) -> None: ...
|
||
|
|
|
||
|
|
def __getitem__(self, key: str) -> Any: ...
|
||
|
|
|
||
|
|
def model_dump(self) -> dict: ...
|
||
|
|
|
||
|
|
|
||
|
|
T_Record = TypeVar("T_Record", bound=DataRecordProtocol)
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseTimestamp(str):
|
||
|
|
"""ISO8601 UTC datetime string used as database timestamp.
|
||
|
|
|
||
|
|
Must always be in UTC and lexicographically sortable.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
"20241027T123456[Z]" # 2024-10-27 12:34:56
|
||
|
|
"""
|
||
|
|
|
||
|
|
__slots__ = ()
|
||
|
|
|
||
|
|
@classmethod
|
||
|
|
def from_datetime(cls, dt: DateTime) -> "DatabaseTimestamp":
|
||
|
|
if dt.tz is None:
|
||
|
|
raise ValueError("Timezone-aware datetime required")
|
||
|
|
|
||
|
|
return cls(dt.in_timezone("UTC").format("YYYYMMDDTHHmmss[Z]"))
|
||
|
|
|
||
|
|
def to_datetime(self) -> DateTime:
|
||
|
|
from pendulum import parse
|
||
|
|
|
||
|
|
return parse(self)
|
||
|
|
|
||
|
|
|
||
|
|
class _DatabaseTimestampUnbound(str):
|
||
|
|
"""Sentinel type representing an unbounded datetime value for database usage.
|
||
|
|
|
||
|
|
Instances of this class are designed to be totally ordered relative to
|
||
|
|
ISO datetime strings:
|
||
|
|
|
||
|
|
- UNBOUND_START is smaller than any other value.
|
||
|
|
- UNBOUND_END is greater than any other value.
|
||
|
|
|
||
|
|
This makes the type safe for:
|
||
|
|
- sorted lists
|
||
|
|
- bisect operations
|
||
|
|
- dictionary keys
|
||
|
|
- range queries
|
||
|
|
|
||
|
|
The type inherits from `str` to remain maximally efficient for hashing
|
||
|
|
and dictionary usage.
|
||
|
|
"""
|
||
|
|
|
||
|
|
__slots__ = ("_is_start",)
|
||
|
|
|
||
|
|
if TYPE_CHECKING:
|
||
|
|
_is_start: bool
|
||
|
|
|
||
|
|
def __new__(cls, value: str, is_start: bool) -> "_DatabaseTimestampUnbound":
|
||
|
|
obj = super().__new__(cls, value)
|
||
|
|
obj._is_start = is_start
|
||
|
|
return obj
|
||
|
|
|
||
|
|
def __lt__(self, other: object) -> bool:
|
||
|
|
if isinstance(other, _DatabaseTimestampUnbound):
|
||
|
|
return self._is_start and not other._is_start
|
||
|
|
return self._is_start
|
||
|
|
|
||
|
|
def __le__(self, other: object) -> bool:
|
||
|
|
if isinstance(other, _DatabaseTimestampUnbound):
|
||
|
|
return self._is_start or self is other
|
||
|
|
return self._is_start
|
||
|
|
|
||
|
|
def __gt__(self, other: object) -> bool:
|
||
|
|
if isinstance(other, _DatabaseTimestampUnbound):
|
||
|
|
return not self._is_start and other._is_start
|
||
|
|
return not self._is_start
|
||
|
|
|
||
|
|
def __ge__(self, other: object) -> bool:
|
||
|
|
if isinstance(other, _DatabaseTimestampUnbound):
|
||
|
|
return not self._is_start or self is other
|
||
|
|
return not self._is_start
|
||
|
|
|
||
|
|
def __repr__(self) -> str:
|
||
|
|
return "UNBOUND_START" if self._is_start else "UNBOUND_END"
|
||
|
|
|
||
|
|
|
||
|
|
DatabaseTimestampType = Union[DatabaseTimestamp, _DatabaseTimestampUnbound]
|
||
|
|
|
||
|
|
|
||
|
|
# Public sentinels
|
||
|
|
UNBOUND_START: Final[_DatabaseTimestampUnbound] = _DatabaseTimestampUnbound(
|
||
|
|
"UNBOUND_START", is_start=True
|
||
|
|
)
|
||
|
|
UNBOUND_END: Final[_DatabaseTimestampUnbound] = _DatabaseTimestampUnbound(
|
||
|
|
"UNBOUND_END", is_start=False
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class _DatabaseTimeWindowUnbound:
|
||
|
|
"""Sentinel representing an unbounded time window.
|
||
|
|
|
||
|
|
This is distinct from `None`:
|
||
|
|
- None → parameter not provided
|
||
|
|
- UNBOUND_WINDOW → explicitly infinite duration
|
||
|
|
|
||
|
|
Designed to:
|
||
|
|
- be identity-compared (is)
|
||
|
|
- be hashable
|
||
|
|
- be safe for dict usage
|
||
|
|
- avoid accidental equality with other values
|
||
|
|
"""
|
||
|
|
|
||
|
|
__slots__ = ()
|
||
|
|
|
||
|
|
def __repr__(self) -> str:
|
||
|
|
return "UNBOUND_WINDOW"
|
||
|
|
|
||
|
|
def __reduce__(self) -> str:
|
||
|
|
# Ensures singleton behavior during pickling
|
||
|
|
return "UNBOUND_WINDOW"
|
||
|
|
|
||
|
|
|
||
|
|
DatabaseTimeWindowType = Union[Duration, None, _DatabaseTimeWindowUnbound]
|
||
|
|
|
||
|
|
|
||
|
|
UNBOUND_WINDOW: Final[_DatabaseTimeWindowUnbound] = _DatabaseTimeWindowUnbound()
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseRecordProtocol(Protocol, Generic[T_Record]):
|
||
|
|
# ---- derived class required interface ----
|
||
|
|
|
||
|
|
records: list[T_Record]
|
||
|
|
|
||
|
|
def model_post_init(self, __context: Any) -> None: ...
|
||
|
|
|
||
|
|
def model_copy(self, *, deep: bool = False) -> Self: ...
|
||
|
|
|
||
|
|
# record class introspection
|
||
|
|
@classmethod
|
||
|
|
def record_class(cls) -> Type[T_Record]: ...
|
||
|
|
|
||
|
|
# Duration for which records shall be kept in database storage
|
||
|
|
def db_keep_duration(self) -> Optional[Duration]: ...
|
||
|
|
|
||
|
|
# namespace
|
||
|
|
def db_namespace(self) -> str: ...
|
||
|
|
|
||
|
|
# ---- public DB interface ----
|
||
|
|
|
||
|
|
def _db_reset_state(self) -> None: ...
|
||
|
|
|
||
|
|
@property
|
||
|
|
def db_enabled(self) -> bool: ...
|
||
|
|
|
||
|
|
def db_timestamp_range(self) -> tuple[DatabaseTimestampType, DatabaseTimestampType]: ...
|
||
|
|
|
||
|
|
def db_generate_timestamps(
|
||
|
|
self,
|
||
|
|
start_timestamp: DatabaseTimestamp,
|
||
|
|
values_count: int,
|
||
|
|
interval: Optional[Duration] = None,
|
||
|
|
) -> Iterator[DatabaseTimestamp]: ...
|
||
|
|
|
||
|
|
def db_get_record(self, target_timestamp: DatabaseTimestamp) -> Optional[T_Record]: ...
|
||
|
|
|
||
|
|
def db_insert_record(
|
||
|
|
self,
|
||
|
|
record: T_Record,
|
||
|
|
*,
|
||
|
|
mark_dirty: bool = True,
|
||
|
|
) -> None: ...
|
||
|
|
|
||
|
|
def db_iterate_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> Iterator[T_Record]: ...
|
||
|
|
|
||
|
|
def db_load_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int: ...
|
||
|
|
|
||
|
|
def db_delete_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int: ...
|
||
|
|
|
||
|
|
# ---- dirty tracking ----
|
||
|
|
def db_mark_dirty_record(self, record: T_Record) -> None: ...
|
||
|
|
|
||
|
|
def db_save_records(self) -> int: ...
|
||
|
|
|
||
|
|
# ---- autosave ----
|
||
|
|
def db_autosave(self) -> int: ...
|
||
|
|
|
||
|
|
# ---- Remove old records from database to free space ----
|
||
|
|
def db_vacuum(
|
||
|
|
self,
|
||
|
|
keep_hours: Optional[int] = None,
|
||
|
|
keep_datetime: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int: ...
|
||
|
|
|
||
|
|
# ---- statistics about database storage ----
|
||
|
|
def db_count_records(self) -> int: ...
|
||
|
|
|
||
|
|
def db_get_stats(self) -> dict: ...
|
||
|
|
|
||
|
|
|
||
|
|
T_DatabaseRecordProtocol = TypeVar("T_DatabaseRecordProtocol", bound="DatabaseRecordProtocol")
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseRecordProtocolLoadPhase(Enum):
|
||
|
|
"""Database loading phases.
|
||
|
|
|
||
|
|
NONE:
|
||
|
|
No records have been loaded from the database.
|
||
|
|
|
||
|
|
INITIAL:
|
||
|
|
A limited initial time window has been loaded, typically centered
|
||
|
|
around a target datetime.
|
||
|
|
|
||
|
|
FULL:
|
||
|
|
All records in the database have been loaded into memory.
|
||
|
|
|
||
|
|
The phase controls whether further calls to ``db_ensure_loaded`` may
|
||
|
|
trigger additional database access.
|
||
|
|
"""
|
||
|
|
|
||
|
|
NONE = auto() # nothing loaded
|
||
|
|
INITIAL = auto() # initial window loaded
|
||
|
|
FULL = auto() # fully expanded
|
||
|
|
|
||
|
|
|
||
|
|
class DatabaseRecordProtocolMixin(
|
||
|
|
ConfigMixin,
|
||
|
|
DatabaseMixin,
|
||
|
|
Generic[T_Record], # for typing only
|
||
|
|
):
|
||
|
|
"""Database Record Protocol Mixin.
|
||
|
|
|
||
|
|
Completely manages in memory records and database storage.
|
||
|
|
|
||
|
|
Expects records with date_time (DatabaseTimestamp) property and the a record list
|
||
|
|
in self.records of the derived class.
|
||
|
|
|
||
|
|
DatabaseRecordProtocolMixin expects the derived classes to be singletons.
|
||
|
|
"""
|
||
|
|
|
||
|
|
# Tell mypy these attributes exist (will be provided by subclasses)
|
||
|
|
if TYPE_CHECKING:
|
||
|
|
records: list[T_Record]
|
||
|
|
|
||
|
|
@classmethod
|
||
|
|
def record_class(cls) -> Type[T_Record]: ...
|
||
|
|
|
||
|
|
@property
|
||
|
|
def record_keys_writable(self) -> list[str]: ...
|
||
|
|
|
||
|
|
def key_to_array(
|
||
|
|
self,
|
||
|
|
key: str,
|
||
|
|
start_datetime: Optional[DateTime] = None,
|
||
|
|
end_datetime: Optional[DateTime] = None,
|
||
|
|
interval: Optional[Duration] = None,
|
||
|
|
fill_method: Optional[str] = None,
|
||
|
|
dropna: Optional[bool] = True,
|
||
|
|
boundary: Literal["strict", "context"] = "context",
|
||
|
|
align_to_interval: bool = False,
|
||
|
|
) -> NDArray[Shape["*"], Any]: ...
|
||
|
|
|
||
|
|
# Database configuration
|
||
|
|
|
||
|
|
def db_initial_time_window(self) -> Optional[Duration]:
|
||
|
|
"""Return the initial time window used for database loading.
|
||
|
|
|
||
|
|
This window defines the initial symmetric time span around a target datetime
|
||
|
|
that should be loaded from the database when no explicit search time window
|
||
|
|
is specified. It serves as a loading hint and may be expanded by the caller
|
||
|
|
if no records are found within the initial range.
|
||
|
|
|
||
|
|
Subclasses may override this method to provide a domain-specific default.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The initial loading time window as a Duration, or ``None`` to indicate
|
||
|
|
that no initial window constraint should be applied.
|
||
|
|
"""
|
||
|
|
return None
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Initialization
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def _db_ensure_initialized(self) -> None:
|
||
|
|
"""Initialize DB runtime state.
|
||
|
|
|
||
|
|
Idempotent — safe to call multiple times.
|
||
|
|
"""
|
||
|
|
if not getattr(self, "_db_initialized", None):
|
||
|
|
# record datetime to record mapping for fast lookup
|
||
|
|
self._db_record_index: dict[DatabaseTimestamp, T_Record] = {}
|
||
|
|
self._db_sorted_timestamps: list[DatabaseTimestamp] = []
|
||
|
|
|
||
|
|
# Loading phase tracking
|
||
|
|
self._db_load_phase: DatabaseRecordProtocolLoadPhase = (
|
||
|
|
DatabaseRecordProtocolLoadPhase.NONE
|
||
|
|
)
|
||
|
|
# Range of timestamps the was already queried from database storage during load
|
||
|
|
self._db_loaded_range: Optional[tuple[DatabaseTimestampType, DatabaseTimestampType]] = (
|
||
|
|
None
|
||
|
|
)
|
||
|
|
|
||
|
|
# Dirty tracking
|
||
|
|
# - dirty records since last save
|
||
|
|
self._db_dirty_timestamps: set[DatabaseTimestamp] = set()
|
||
|
|
# - records added since last save
|
||
|
|
self._db_new_timestamps: set[DatabaseTimestamp] = set()
|
||
|
|
# - deleted records since last save
|
||
|
|
self._db_deleted_timestamps: set[DatabaseTimestamp] = set()
|
||
|
|
|
||
|
|
self._db_version: int = 1
|
||
|
|
|
||
|
|
# Storage
|
||
|
|
self._db_metadata: Optional[dict] = None
|
||
|
|
self._db_storage_initialized: bool = False
|
||
|
|
|
||
|
|
self._db_initialized: bool = True
|
||
|
|
|
||
|
|
if not self._db_storage_initialized and self.db_enabled:
|
||
|
|
# Metadata
|
||
|
|
existing_metadata = self._db_load_metadata()
|
||
|
|
if existing_metadata:
|
||
|
|
self._db_metadata = existing_metadata
|
||
|
|
else:
|
||
|
|
self._db_metadata = {
|
||
|
|
"version": self._db_version,
|
||
|
|
"created": to_datetime(as_string=True),
|
||
|
|
"provider_id": getattr(self, "provider_id", lambda: "unknown")(),
|
||
|
|
"compression": self.database.compression,
|
||
|
|
"backend": self.database.__class__.__name__,
|
||
|
|
}
|
||
|
|
self._db_save_metadata(self._db_metadata)
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
f"Initialized {self.database.__class__.__name__}:{self.db_namespace()} storage at "
|
||
|
|
f"{self.database.storage_path} "
|
||
|
|
f"autosave_interval_sec={self.config.database.autosave_interval_sec})"
|
||
|
|
)
|
||
|
|
|
||
|
|
self._db_storage_initialized = True
|
||
|
|
|
||
|
|
def model_post_init(self, __context: Any) -> None:
|
||
|
|
"""Initialize DB state attributes immediately after Pydantic construction."""
|
||
|
|
# Always call super() first — other mixins may also define model_post_init
|
||
|
|
super().model_post_init(__context) # type: ignore[misc]
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Helpers
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def _db_key_from_timestamp(self, dt: DatabaseTimestamp) -> bytes:
|
||
|
|
"""Convert database timestamp to a sortable database backend key."""
|
||
|
|
return dt.encode("utf-8")
|
||
|
|
|
||
|
|
def _db_key_to_timestamp(self, dbkey: bytes) -> DatabaseTimestamp:
|
||
|
|
"""Convert database backend key back to database timestamp."""
|
||
|
|
return DatabaseTimestamp(dbkey.decode("utf-8"))
|
||
|
|
|
||
|
|
def _db_timestamp_after(self, timestamp: DatabaseTimestamp) -> DatabaseTimestamp:
|
||
|
|
"""Get database timestamp after this timestamp.
|
||
|
|
|
||
|
|
A minimal time span is added to the DatabaseTimestamp to get the first possible timestamp
|
||
|
|
after DatabaseTimestamp.
|
||
|
|
"""
|
||
|
|
target = DatabaseTimestamp.to_datetime(timestamp)
|
||
|
|
db_datetime_after = DatabaseTimestamp.from_datetime(target.add(seconds=1))
|
||
|
|
return db_datetime_after
|
||
|
|
|
||
|
|
def db_previous_timestamp(
|
||
|
|
self,
|
||
|
|
timestamp: DatabaseTimestamp,
|
||
|
|
) -> Optional[DatabaseTimestamp]:
|
||
|
|
"""Find the largest timestamp < given timestamp.
|
||
|
|
|
||
|
|
Search memory-first, then fallback to database if necessary.
|
||
|
|
"""
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# Step 1: Memory-first search
|
||
|
|
if self._db_sorted_timestamps:
|
||
|
|
idx = bisect.bisect_left(self._db_sorted_timestamps, timestamp)
|
||
|
|
if idx > 0:
|
||
|
|
return self._db_sorted_timestamps[idx - 1]
|
||
|
|
|
||
|
|
# Step 2: Check if DB might contain older keys
|
||
|
|
if not self.db_enabled:
|
||
|
|
return None
|
||
|
|
|
||
|
|
db_min_key, _ = self.database.get_key_range(self.db_namespace())
|
||
|
|
if db_min_key is None:
|
||
|
|
return None
|
||
|
|
|
||
|
|
db_min_ts = self._db_key_to_timestamp(db_min_key)
|
||
|
|
if timestamp <= db_min_ts:
|
||
|
|
return None
|
||
|
|
|
||
|
|
# Step 3: Load left part of DB if not already in memory
|
||
|
|
# We want records < timestamp
|
||
|
|
start_key = None
|
||
|
|
end_key = self._db_key_from_timestamp(timestamp)
|
||
|
|
|
||
|
|
# Only load if timestamp is out of currently loaded memory
|
||
|
|
if self._db_loaded_range:
|
||
|
|
loaded_start, _ = self._db_loaded_range
|
||
|
|
if isinstance(loaded_start, DatabaseTimestamp) and timestamp > loaded_start:
|
||
|
|
# Already partially loaded, restrict iterator to unloaded portion
|
||
|
|
start_key = self._db_key_from_timestamp(loaded_start)
|
||
|
|
|
||
|
|
previous_ts: Optional[DatabaseTimestamp] = None
|
||
|
|
for key, _ in self.database.iterate_records(
|
||
|
|
start_key=start_key,
|
||
|
|
end_key=end_key,
|
||
|
|
namespace=self.db_namespace(),
|
||
|
|
):
|
||
|
|
ts = self._db_key_to_timestamp(key)
|
||
|
|
if ts in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
previous_ts = ts # last one before `timestamp`
|
||
|
|
|
||
|
|
return previous_ts
|
||
|
|
|
||
|
|
def db_next_timestamp(
|
||
|
|
self,
|
||
|
|
timestamp: DatabaseTimestamp,
|
||
|
|
) -> Optional[DatabaseTimestamp]:
|
||
|
|
"""Find the smallest timestamp > given timestamp.
|
||
|
|
|
||
|
|
Search memory-first, then fallback to database if necessary.
|
||
|
|
"""
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# Step 1: Memory-first search
|
||
|
|
if self._db_sorted_timestamps:
|
||
|
|
idx = bisect.bisect_right(self._db_sorted_timestamps, timestamp)
|
||
|
|
if idx < len(self._db_sorted_timestamps):
|
||
|
|
return self._db_sorted_timestamps[idx]
|
||
|
|
|
||
|
|
# Step 2: Check if DB might contain newer keys
|
||
|
|
if not self.db_enabled:
|
||
|
|
return None
|
||
|
|
|
||
|
|
_, db_max_key = self.database.get_key_range(self.db_namespace())
|
||
|
|
if db_max_key is None:
|
||
|
|
return None
|
||
|
|
|
||
|
|
db_max_ts = self._db_key_to_timestamp(db_max_key)
|
||
|
|
if timestamp >= db_max_ts:
|
||
|
|
return None
|
||
|
|
|
||
|
|
# Step 3: Search right part of DB if not already in memory
|
||
|
|
timestamp_key = self._db_key_from_timestamp(timestamp)
|
||
|
|
start_key = timestamp_key
|
||
|
|
end_key = None
|
||
|
|
|
||
|
|
# Restrict iterator to unloaded portion if partially loaded
|
||
|
|
if self._db_loaded_range:
|
||
|
|
_, loaded_end = self._db_loaded_range
|
||
|
|
# Assumes everything < loaded_end is fully represented in memory.
|
||
|
|
if isinstance(loaded_end, DatabaseTimestamp) and timestamp < loaded_end:
|
||
|
|
start_key = self._db_key_from_timestamp(max(timestamp, loaded_end))
|
||
|
|
|
||
|
|
for key, _ in self.database.iterate_records(
|
||
|
|
start_key=start_key,
|
||
|
|
end_key=end_key,
|
||
|
|
namespace=self.db_namespace(),
|
||
|
|
):
|
||
|
|
if key == timestamp_key:
|
||
|
|
# skip
|
||
|
|
continue
|
||
|
|
|
||
|
|
ts = self._db_key_to_timestamp(key)
|
||
|
|
|
||
|
|
# Check for deleted (only necessary for database - memory already removed
|
||
|
|
if ts in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
|
||
|
|
return ts # first valid one
|
||
|
|
|
||
|
|
return None
|
||
|
|
|
||
|
|
def _db_serialize_record(self, record: T_Record) -> bytes:
|
||
|
|
"""Serialize a DataRecord to bytes."""
|
||
|
|
if self.database is None:
|
||
|
|
raise ValueError("Database not defined.")
|
||
|
|
data = pickle.dumps(record.model_dump(), protocol=pickle.HIGHEST_PROTOCOL)
|
||
|
|
return self.database.serialize_data(data)
|
||
|
|
|
||
|
|
def _db_deserialize_record(self, data: bytes) -> T_Record:
|
||
|
|
"""Deserialize bytes to a DataRecord."""
|
||
|
|
if self.database is None:
|
||
|
|
raise ValueError("Database not defined.")
|
||
|
|
data = self.database.deserialize_data(data)
|
||
|
|
record_data = pickle.loads(data) # noqa: S301
|
||
|
|
return self.record_class()(**record_data)
|
||
|
|
|
||
|
|
def _db_save_metadata(self, metadata: dict) -> None:
|
||
|
|
"""Save metadata to database."""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return
|
||
|
|
|
||
|
|
key = DATABASE_METADATA_KEY
|
||
|
|
value = pickle.dumps(metadata)
|
||
|
|
self.database.set_metadata(value, namespace=self.db_namespace())
|
||
|
|
|
||
|
|
def _db_load_metadata(self) -> Optional[dict]:
|
||
|
|
"""Load metadata from database."""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return None
|
||
|
|
|
||
|
|
try:
|
||
|
|
value = self.database.get_metadata(namespace=self.db_namespace())
|
||
|
|
return pickle.loads(value) # noqa: S301
|
||
|
|
except Exception:
|
||
|
|
logger.debug("Can not load metadata.")
|
||
|
|
return None
|
||
|
|
|
||
|
|
def _db_reset_state(self) -> None:
|
||
|
|
self.records = []
|
||
|
|
self._db_loaded_range = None
|
||
|
|
self._db_load_phase = DatabaseRecordProtocolLoadPhase.NONE
|
||
|
|
try:
|
||
|
|
del self._db_initialized
|
||
|
|
except:
|
||
|
|
logger.debug("_db_reset_state called on uninitialized sequence")
|
||
|
|
|
||
|
|
def _db_clone_empty(self: T_DatabaseRecordProtocol) -> T_DatabaseRecordProtocol:
|
||
|
|
"""Create an empty internal clone for database operations.
|
||
|
|
|
||
|
|
The clone shares configuration and database access implicitly via
|
||
|
|
ConfigMixin and DatabaseMixin, but contains no in-memory records
|
||
|
|
or loaded-range state.
|
||
|
|
|
||
|
|
Internal helper for database workflows only.
|
||
|
|
"""
|
||
|
|
clone = self.model_copy(deep=True)
|
||
|
|
clone._db_reset_state()
|
||
|
|
|
||
|
|
return clone
|
||
|
|
|
||
|
|
def _search_window(
|
||
|
|
self,
|
||
|
|
center_timestamp: Optional[DatabaseTimestampType],
|
||
|
|
time_window: DatabaseTimeWindowType,
|
||
|
|
) -> tuple[DatabaseTimestampType, DatabaseTimestampType]:
|
||
|
|
"""Compute a symmetric search window around a center timestamp.
|
||
|
|
|
||
|
|
This method always returns valid database boundary values.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
center_timestamp: Center of the window. Defaults to current UTC time
|
||
|
|
if None. Must not be an unbounded timestamp sentinel.
|
||
|
|
time_window: Total width of the search window.
|
||
|
|
Half is applied on each side of center_timestamp.
|
||
|
|
- None: interpreted as unbounded.
|
||
|
|
- UNBOUND_WINDOW: interpreted as unbounded.
|
||
|
|
- Duration: symmetric bounded interval.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
A tuple (start, end) representing a half-open interval.
|
||
|
|
Always returns valid database timestamp boundaries:
|
||
|
|
either concrete timestamps or (UNBOUND_START, UNBOUND_END).
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
TypeError: If center_timestamp is an unbounded timestamp sentinel.
|
||
|
|
ValueError: If time_window is a negative Duration.
|
||
|
|
"""
|
||
|
|
# Unbounded cases → full DB range
|
||
|
|
if time_window is None or isinstance(time_window, _DatabaseTimeWindowUnbound):
|
||
|
|
return UNBOUND_START, UNBOUND_END
|
||
|
|
|
||
|
|
if isinstance(center_timestamp, _DatabaseTimestampUnbound):
|
||
|
|
raise TypeError("center_timestamp cannot be of unbounded timestamp type.")
|
||
|
|
|
||
|
|
# Resolve center
|
||
|
|
if center_timestamp is None:
|
||
|
|
center = to_datetime().in_timezone("UTC")
|
||
|
|
else:
|
||
|
|
center = DatabaseTimestamp.to_datetime(center_timestamp)
|
||
|
|
|
||
|
|
duration = to_duration(time_window)
|
||
|
|
|
||
|
|
if duration.total_seconds() < 0:
|
||
|
|
raise ValueError("time_window must be non-negative")
|
||
|
|
|
||
|
|
# Use duration arithmetic to avoid float precision issues
|
||
|
|
half = duration / 2
|
||
|
|
|
||
|
|
start = center - half
|
||
|
|
end = center + half
|
||
|
|
|
||
|
|
return (
|
||
|
|
DatabaseTimestamp.from_datetime(start),
|
||
|
|
DatabaseTimestamp.from_datetime(end),
|
||
|
|
)
|
||
|
|
|
||
|
|
def _db_range_covered(
|
||
|
|
self,
|
||
|
|
start_timestamp: DatabaseTimestampType,
|
||
|
|
end_timestamp: DatabaseTimestampType,
|
||
|
|
) -> bool:
|
||
|
|
"""Return True if [start_timestamp, end_timestamp) is fully covered.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
start_timestamp: Inclusive lower boundary of the requested range.
|
||
|
|
end_timestamp: Exclusive upper boundary of the requested range.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
True if the requested half-open interval is completely contained
|
||
|
|
within the loaded database range.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
TypeError: If start_timestamp or end_timestamp is None.
|
||
|
|
"""
|
||
|
|
if start_timestamp is None or end_timestamp is None:
|
||
|
|
raise TypeError(
|
||
|
|
"start_timestamp and end_timestamp must not be None. "
|
||
|
|
"Use UNBOUND_START / UNBOUND_END instead."
|
||
|
|
)
|
||
|
|
|
||
|
|
if not isinstance(start_timestamp, (str, _DatabaseTimestampUnbound)):
|
||
|
|
raise TypeError(
|
||
|
|
f"Invalid start_timestamp type: {type(start_timestamp)}. "
|
||
|
|
"Must be DatabaseTimestamp or unbound sentinel."
|
||
|
|
)
|
||
|
|
|
||
|
|
if not isinstance(end_timestamp, (str, _DatabaseTimestampUnbound)):
|
||
|
|
raise TypeError(
|
||
|
|
f"Invalid end_timestamp type: {type(end_timestamp)}. "
|
||
|
|
"Must be DatabaseTimestamp or unbound sentinel."
|
||
|
|
)
|
||
|
|
|
||
|
|
if self._db_loaded_range is None:
|
||
|
|
return False
|
||
|
|
|
||
|
|
loaded_start, loaded_end = self._db_loaded_range
|
||
|
|
|
||
|
|
if loaded_start is None or loaded_end is None:
|
||
|
|
return False
|
||
|
|
|
||
|
|
return loaded_start <= start_timestamp and end_timestamp <= loaded_end
|
||
|
|
|
||
|
|
def _db_load_initial_window(
|
||
|
|
self,
|
||
|
|
center_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> None:
|
||
|
|
"""Load an initial time window of records from the database.
|
||
|
|
|
||
|
|
This method establishes the first lazy-loading window when the load phase
|
||
|
|
is ``NONE``. It queries the database for records within a symmetric time
|
||
|
|
interval around ``center_timestamp`` and transitions the load phase to
|
||
|
|
``INITIAL``.
|
||
|
|
|
||
|
|
The loaded interval is recorded in ``self._db_loaded_range`` and represents
|
||
|
|
**database coverage**, not memory continuity. That is:
|
||
|
|
|
||
|
|
- All database records in the half-open interval
|
||
|
|
[start_timestamp, end_timestamp) have been queried.
|
||
|
|
- Records within that interval are either loaded into memory or
|
||
|
|
confirmed absent.
|
||
|
|
- The interval does not imply that memory contains continuous records.
|
||
|
|
|
||
|
|
The loaded range is later expanded incrementally if additional
|
||
|
|
out-of-window ranges are requested.
|
||
|
|
|
||
|
|
If ``center_timestamp`` is not provided, the current time is used.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
center_timestamp (DatabaseTimestampType):
|
||
|
|
The central reference time for the initial loading window.
|
||
|
|
If None, the current time is used.
|
||
|
|
|
||
|
|
Side Effects:
|
||
|
|
|
||
|
|
* Loads records from persistent storage into memory.
|
||
|
|
* Sets ``self._db_loaded_range`` by db_load_records().
|
||
|
|
* Sets ``self._db_load_phase`` to ``INITIAL``.
|
||
|
|
|
||
|
|
Notes:
|
||
|
|
* The loaded range uses half-open interval semantics:
|
||
|
|
[start_timestamp, end_timestamp).
|
||
|
|
* This method does not perform a full database load.
|
||
|
|
* Empty query results still establish coverage for the interval,
|
||
|
|
preventing redundant database queries.
|
||
|
|
"""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return
|
||
|
|
|
||
|
|
# Redundant guard - should only be called from load phase None
|
||
|
|
if self._db_load_phase is not DatabaseRecordProtocolLoadPhase.NONE:
|
||
|
|
raise RuntimeError(
|
||
|
|
"_db_load_initial_window() may only be called when load phase is NONE."
|
||
|
|
)
|
||
|
|
|
||
|
|
window_h = self.config.database.initial_load_window_h
|
||
|
|
if window_h is None:
|
||
|
|
start, end = self._search_window(center_timestamp, UNBOUND_WINDOW)
|
||
|
|
else:
|
||
|
|
window = to_duration(window_h * 3600)
|
||
|
|
start, end = self._search_window(center_timestamp, window)
|
||
|
|
|
||
|
|
self.db_load_records(start, end)
|
||
|
|
|
||
|
|
self._db_load_phase = DatabaseRecordProtocolLoadPhase.INITIAL
|
||
|
|
|
||
|
|
def _db_load_full(self) -> int:
|
||
|
|
"""Load all remaining records from the database into memory.
|
||
|
|
|
||
|
|
This method performs a **full load** of the database, ensuring that all
|
||
|
|
records are present in memory. After this operation, the `_db_load_phase`
|
||
|
|
will be set to FULL, and `_db_loaded_range` will cover all known records.
|
||
|
|
|
||
|
|
**State transitions:**
|
||
|
|
|
||
|
|
* Allowed only from the INITIAL phase (partial window loaded) or NONE
|
||
|
|
(nothing loaded yet).
|
||
|
|
* If already FULL, the method is a no-op and returns 0.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
int: Number of records loaded from the database during this operation.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
RuntimeError: If called from an invalid load phase.
|
||
|
|
"""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# Guard: must only run from NONE or INITIAL
|
||
|
|
if self._db_load_phase not in (
|
||
|
|
DatabaseRecordProtocolLoadPhase.NONE,
|
||
|
|
DatabaseRecordProtocolLoadPhase.INITIAL,
|
||
|
|
):
|
||
|
|
raise RuntimeError(
|
||
|
|
"_db_load_full() may only be called when load phase is NONE or INITIAL."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Perform full database load (memory is authoritative; skips duplicates)
|
||
|
|
# This also sets _db_loaded_range
|
||
|
|
loaded_count = self.db_load_records()
|
||
|
|
|
||
|
|
# Update state
|
||
|
|
self._db_load_phase = DatabaseRecordProtocolLoadPhase.FULL
|
||
|
|
|
||
|
|
return loaded_count
|
||
|
|
|
||
|
|
def _extend_boundaries(
|
||
|
|
self,
|
||
|
|
start_timestamp: DatabaseTimestampType,
|
||
|
|
end_timestamp: DatabaseTimestampType,
|
||
|
|
) -> tuple[DatabaseTimestampType, DatabaseTimestampType]:
|
||
|
|
"""Find nearest database records outside requested range.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
(new_start, new_end) timestamps to fully cover requested range including neighbors.
|
||
|
|
"""
|
||
|
|
if start_timestamp is None or end_timestamp is None:
|
||
|
|
# Make mypy happy
|
||
|
|
raise RuntimeError(f"timestamps shall be non None: {start_timestamp}, {end_timestamp}")
|
||
|
|
|
||
|
|
new_start, new_end = start_timestamp, end_timestamp
|
||
|
|
|
||
|
|
# Extend start
|
||
|
|
if (
|
||
|
|
not isinstance(start_timestamp, _DatabaseTimestampUnbound)
|
||
|
|
and self._db_sorted_timestamps
|
||
|
|
and start_timestamp < self._db_sorted_timestamps[0]
|
||
|
|
):
|
||
|
|
# There may be earlier DB records
|
||
|
|
# Reverse iterate to get nearest smaller key
|
||
|
|
for key, _ in self.database.iterate_records(
|
||
|
|
start_key=UNBOUND_START,
|
||
|
|
end_key=self._db_key_from_timestamp(start_timestamp),
|
||
|
|
namespace=self.db_namespace(),
|
||
|
|
reverse=True,
|
||
|
|
):
|
||
|
|
ts = self._db_key_to_timestamp(key)
|
||
|
|
|
||
|
|
if ts in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if ts < start_timestamp:
|
||
|
|
new_start = ts
|
||
|
|
break # first valid record is the nearest
|
||
|
|
|
||
|
|
# Extend end
|
||
|
|
if (
|
||
|
|
not isinstance(end_timestamp, _DatabaseTimestampUnbound)
|
||
|
|
and self._db_sorted_timestamps
|
||
|
|
and end_timestamp > self._db_sorted_timestamps[-1]
|
||
|
|
):
|
||
|
|
# There may be later DB records
|
||
|
|
for key, _ in self.database.iterate_records(
|
||
|
|
start_key=self._db_key_from_timestamp(end_timestamp),
|
||
|
|
end_key=UNBOUND_END,
|
||
|
|
namespace=self.db_namespace(),
|
||
|
|
):
|
||
|
|
ts = self._db_key_to_timestamp(key)
|
||
|
|
|
||
|
|
if ts in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if ts >= end_timestamp:
|
||
|
|
new_end = ts
|
||
|
|
break # first valid record is the nearest
|
||
|
|
|
||
|
|
return new_start, new_end
|
||
|
|
|
||
|
|
def _db_ensure_loaded(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
*,
|
||
|
|
center_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> None:
|
||
|
|
"""Ensure database records for a given timestamp range are available in memory.
|
||
|
|
|
||
|
|
Lazy loading is performed in phases: NONE -> INITIAL -> FULL
|
||
|
|
|
||
|
|
1. **NONE**: No records loaded yet.
|
||
|
|
|
||
|
|
* If a range is provided, load exactly that range.
|
||
|
|
* If no range, load an initial window around `center_timestamp`.
|
||
|
|
|
||
|
|
2. **INITIAL**: A partial window is loaded.
|
||
|
|
|
||
|
|
* If requested range extends beyond loaded window, expand left/right as needed.
|
||
|
|
* If no range requested, escalate to FULL.
|
||
|
|
|
||
|
|
3. **FULL**: All records already loaded. Nothing to do.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
start_timestamp (DatabaseTimestampType): Inclusive start of desired range.
|
||
|
|
end_timestamp (DatabaseTimestampType): Exclusive end of desired range.
|
||
|
|
center_timestamp (DatabaseTimestampType): Center for initial window if nothing loaded.
|
||
|
|
|
||
|
|
Notes:
|
||
|
|
* Only used for preparing memory for subsequent queries; does not return records.
|
||
|
|
* `center_timestamp` is ignored once an initial window has been established.
|
||
|
|
"""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return
|
||
|
|
|
||
|
|
# Normalize boundaries immediately (strict DB layer rule)
|
||
|
|
if start_timestamp is None:
|
||
|
|
start_timestamp = UNBOUND_START
|
||
|
|
if end_timestamp is None:
|
||
|
|
end_timestamp = UNBOUND_END
|
||
|
|
|
||
|
|
# Shortcut: memory already covers the extended range
|
||
|
|
if self._db_sorted_timestamps:
|
||
|
|
mem_start, mem_end = self._db_sorted_timestamps[0], self._db_sorted_timestamps[-1]
|
||
|
|
|
||
|
|
# Case 1: bounded request
|
||
|
|
if (
|
||
|
|
start_timestamp is not UNBOUND_START
|
||
|
|
and end_timestamp is not UNBOUND_END
|
||
|
|
and mem_start < start_timestamp
|
||
|
|
and mem_end >= end_timestamp
|
||
|
|
):
|
||
|
|
return
|
||
|
|
|
||
|
|
# Case 2: unbounded request only safe if FULL
|
||
|
|
if (
|
||
|
|
self._db_load_phase is DatabaseRecordProtocolLoadPhase.FULL
|
||
|
|
and (start_timestamp is UNBOUND_START or mem_start < start_timestamp)
|
||
|
|
and (end_timestamp is UNBOUND_END or mem_end >= end_timestamp)
|
||
|
|
):
|
||
|
|
return
|
||
|
|
|
||
|
|
# Phase 0: NOTHING LOADED
|
||
|
|
if self._db_load_phase is DatabaseRecordProtocolLoadPhase.NONE:
|
||
|
|
if start_timestamp is UNBOUND_START and end_timestamp is UNBOUND_END:
|
||
|
|
self._db_load_initial_window(center_timestamp)
|
||
|
|
# _db_load_initial_window sets _db_loaded_range and _db_load_phase
|
||
|
|
else:
|
||
|
|
# Load the records
|
||
|
|
loaded = self.db_load_records(start_timestamp, end_timestamp)
|
||
|
|
self._db_load_phase = DatabaseRecordProtocolLoadPhase.INITIAL
|
||
|
|
return
|
||
|
|
|
||
|
|
if center_timestamp is not None:
|
||
|
|
logger.debug(
|
||
|
|
f"Center timestamp parameter '{center_timestamp}' given outside of load phase NONE"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Phase 1: INITIAL WINDOW (PARTIAL)
|
||
|
|
if self._db_load_phase is DatabaseRecordProtocolLoadPhase.INITIAL:
|
||
|
|
# Escalate to FULL if no range is specified
|
||
|
|
if self._db_loaded_range is None:
|
||
|
|
# Should never happen
|
||
|
|
raise RuntimeError("_db_loaded_range shall set when load phase is INITIAL")
|
||
|
|
|
||
|
|
if self._db_range_covered(start_timestamp, end_timestamp):
|
||
|
|
return # already have it
|
||
|
|
|
||
|
|
if start_timestamp == UNBOUND_START and end_timestamp == UNBOUND_END:
|
||
|
|
self._db_load_full()
|
||
|
|
return
|
||
|
|
|
||
|
|
current_start, current_end = self._db_loaded_range
|
||
|
|
if current_start is None or current_end is None:
|
||
|
|
raise RuntimeError(
|
||
|
|
"_db_loaded_range shall not be set to (None, None) when load phase is INITIAL"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Left expansion
|
||
|
|
if start_timestamp < current_start:
|
||
|
|
self.db_load_records(start_timestamp, current_start)
|
||
|
|
|
||
|
|
# Right expansion
|
||
|
|
if end_timestamp > current_end:
|
||
|
|
self.db_load_records(current_end, end_timestamp)
|
||
|
|
|
||
|
|
return
|
||
|
|
|
||
|
|
# Phase 2: FULL
|
||
|
|
# Everything already loaded, nothing to do
|
||
|
|
return
|
||
|
|
|
||
|
|
# ---- derived class required interface ----
|
||
|
|
|
||
|
|
def db_keep_duration(self) -> Optional[Duration]:
|
||
|
|
"""Duration for which database records should be retained.
|
||
|
|
|
||
|
|
Used when removing old records from database to free space.
|
||
|
|
|
||
|
|
Defaults to general database configuration.
|
||
|
|
|
||
|
|
May be provided by derived class.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Duration or None (forever).
|
||
|
|
"""
|
||
|
|
duration_h: Optional[Duration] = self.config.database.keep_duration_h
|
||
|
|
if duration_h is None:
|
||
|
|
return None
|
||
|
|
return to_duration(duration_h * 3600)
|
||
|
|
|
||
|
|
def db_namespace(self) -> str:
|
||
|
|
"""Namespace of database.
|
||
|
|
|
||
|
|
To be implemented by derived class.
|
||
|
|
"""
|
||
|
|
raise NotImplementedError
|
||
|
|
|
||
|
|
# ---- public DB interface ----
|
||
|
|
|
||
|
|
@property
|
||
|
|
def db_enabled(self) -> bool:
|
||
|
|
return self.database.is_open
|
||
|
|
|
||
|
|
def db_timestamp_range(
|
||
|
|
self,
|
||
|
|
) -> tuple[Optional[DatabaseTimestamp], Optional[DatabaseTimestamp]]:
|
||
|
|
"""Get the timestamp range of records in database.
|
||
|
|
|
||
|
|
Regards records in storage plus extra records in memory.
|
||
|
|
"""
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if self._db_sorted_timestamps:
|
||
|
|
memory_min_timestamp: Optional[DatabaseTimestamp] = self._db_sorted_timestamps[0]
|
||
|
|
memory_max_timestamp: Optional[DatabaseTimestamp] = self._db_sorted_timestamps[-1]
|
||
|
|
else:
|
||
|
|
memory_min_timestamp = None
|
||
|
|
memory_max_timestamp = None
|
||
|
|
|
||
|
|
if not self.db_enabled:
|
||
|
|
return memory_min_timestamp, memory_max_timestamp
|
||
|
|
|
||
|
|
db_min_key, db_max_key = self.database.get_key_range(self.db_namespace())
|
||
|
|
|
||
|
|
if db_min_key is None or db_max_key is None:
|
||
|
|
return memory_min_timestamp, memory_max_timestamp
|
||
|
|
|
||
|
|
storage_min_timestamp = self._db_key_to_timestamp(db_min_key)
|
||
|
|
storage_max_timestamp = self._db_key_to_timestamp(db_max_key)
|
||
|
|
|
||
|
|
if memory_min_timestamp and memory_min_timestamp < storage_min_timestamp:
|
||
|
|
min_timestamp = memory_min_timestamp
|
||
|
|
else:
|
||
|
|
min_timestamp = storage_min_timestamp
|
||
|
|
if memory_max_timestamp and memory_max_timestamp > storage_max_timestamp:
|
||
|
|
max_timestamp = memory_max_timestamp
|
||
|
|
else:
|
||
|
|
max_timestamp = storage_max_timestamp
|
||
|
|
|
||
|
|
return min_timestamp, max_timestamp
|
||
|
|
|
||
|
|
def db_generate_timestamps(
|
||
|
|
self,
|
||
|
|
start_timestamp: DatabaseTimestamp,
|
||
|
|
values_count: int,
|
||
|
|
interval: Optional[Duration] = None,
|
||
|
|
) -> Iterator[DatabaseTimestamp]:
|
||
|
|
"""Generate database timestamps using fixed absolute time stepping.
|
||
|
|
|
||
|
|
The iterator advances strictly in UTC, guaranteeing constant
|
||
|
|
spacing in seconds across daylight saving transitions.
|
||
|
|
|
||
|
|
Returned database timestamps are in UTC. This avoids ambiguity during
|
||
|
|
fall-back transitions and prevents accidental overwriting when
|
||
|
|
inserting into UTC-normalized storage backends.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
start_timestamp (DatabaseTimestamp): Starting database timestamp.
|
||
|
|
values_count (int): Number of timestamps to generate.
|
||
|
|
interval (Optional[Duration]): Fixed duration between timestamps.
|
||
|
|
Defaults to 1 hour if not provided.
|
||
|
|
|
||
|
|
Yields:
|
||
|
|
DatabaseTimestamp: UTC-based database timestamps.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If values_count is negative.
|
||
|
|
"""
|
||
|
|
if values_count < 0:
|
||
|
|
raise ValueError("values_count must be non-negative")
|
||
|
|
|
||
|
|
if interval is None:
|
||
|
|
interval = Duration(hours=1)
|
||
|
|
|
||
|
|
step_seconds = int(interval.total_seconds())
|
||
|
|
|
||
|
|
current_utc = DatabaseTimestamp.to_datetime(start_timestamp)
|
||
|
|
|
||
|
|
for _ in range(values_count):
|
||
|
|
yield DatabaseTimestamp.from_datetime(current_utc)
|
||
|
|
current_utc = current_utc.add(seconds=step_seconds)
|
||
|
|
|
||
|
|
def db_get_record(
|
||
|
|
self,
|
||
|
|
target_timestamp: DatabaseTimestamp,
|
||
|
|
*,
|
||
|
|
time_window: DatabaseTimeWindowType = None,
|
||
|
|
) -> Optional[T_Record]:
|
||
|
|
"""Get the record at or nearest to the specified timestamp.
|
||
|
|
|
||
|
|
The search strategies are:
|
||
|
|
|
||
|
|
* None - exact match only.
|
||
|
|
* UNBOUND_WINDOW - nearest record across all stored records.
|
||
|
|
* Duration - nearest record within a symmetric window of this total width around
|
||
|
|
target_timestamp.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
target_timestamp: The timestamp to search for.
|
||
|
|
time_window: Controls the search strategy (None, UNBOUND_WINDOW, Duration).
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Exact match, nearest record within the window, or None.
|
||
|
|
"""
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if time_window is None:
|
||
|
|
# Exact match only — load the minimal range containing this point
|
||
|
|
self._db_ensure_loaded(
|
||
|
|
target_timestamp,
|
||
|
|
self._db_timestamp_after(target_timestamp),
|
||
|
|
center_timestamp=target_timestamp,
|
||
|
|
)
|
||
|
|
return self._db_record_index.get(target_timestamp, None)
|
||
|
|
|
||
|
|
# load the relevant range
|
||
|
|
# in case of unbounded escalates to FULL
|
||
|
|
search_start, search_end = self._search_window(target_timestamp, time_window)
|
||
|
|
self._db_ensure_loaded(search_start, search_end, center_timestamp=target_timestamp)
|
||
|
|
|
||
|
|
# Exact match first (works for all three cases once loaded)
|
||
|
|
record = self._db_record_index.get(target_timestamp, None)
|
||
|
|
if record is not None:
|
||
|
|
return record
|
||
|
|
|
||
|
|
# Nearest-neighbour search
|
||
|
|
idx = bisect.bisect_left(self._db_sorted_timestamps, target_timestamp)
|
||
|
|
candidates = []
|
||
|
|
if idx < len(self._db_sorted_timestamps):
|
||
|
|
candidates.append(self.records[idx])
|
||
|
|
if idx > 0:
|
||
|
|
candidates.append(self.records[idx - 1])
|
||
|
|
if not candidates:
|
||
|
|
return None
|
||
|
|
|
||
|
|
record = min(
|
||
|
|
candidates,
|
||
|
|
key=lambda r: abs(
|
||
|
|
(r.date_time - DatabaseTimestamp.to_datetime(target_timestamp)).total_seconds()
|
||
|
|
),
|
||
|
|
)
|
||
|
|
|
||
|
|
# For bounded windows, enforce the distance constraint
|
||
|
|
if not isinstance(time_window, _DatabaseTimeWindowUnbound):
|
||
|
|
half_seconds = to_duration(time_window).total_seconds() / 2
|
||
|
|
if (
|
||
|
|
abs(
|
||
|
|
(
|
||
|
|
record.date_time - DatabaseTimestamp.to_datetime(target_timestamp)
|
||
|
|
).total_seconds()
|
||
|
|
)
|
||
|
|
> half_seconds
|
||
|
|
):
|
||
|
|
return None
|
||
|
|
|
||
|
|
return record
|
||
|
|
|
||
|
|
def db_insert_record(
|
||
|
|
self,
|
||
|
|
record: T_Record,
|
||
|
|
*,
|
||
|
|
mark_dirty: bool = True,
|
||
|
|
) -> None:
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# Ensure normalized to UTC
|
||
|
|
db_record_date_time = DatabaseTimestamp.from_datetime(record.date_time)
|
||
|
|
|
||
|
|
self._db_ensure_loaded(
|
||
|
|
start_timestamp=db_record_date_time,
|
||
|
|
end_timestamp=db_record_date_time,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Memory only
|
||
|
|
if db_record_date_time in self._db_record_index:
|
||
|
|
# No duplicates allowed
|
||
|
|
raise ValueError(f"Duplicate timestamp {record.date_time} -> {db_record_date_time}")
|
||
|
|
|
||
|
|
if db_record_date_time in self._db_deleted_timestamps:
|
||
|
|
# Clear tombstone - if we are re-inserting
|
||
|
|
self._db_deleted_timestamps.discard(db_record_date_time)
|
||
|
|
|
||
|
|
# insert
|
||
|
|
index = bisect.bisect_left(self._db_sorted_timestamps, db_record_date_time)
|
||
|
|
self._db_sorted_timestamps.insert(index, db_record_date_time)
|
||
|
|
self.records.insert(index, record)
|
||
|
|
self._db_record_index[db_record_date_time] = record
|
||
|
|
|
||
|
|
if mark_dirty:
|
||
|
|
self._db_dirty_timestamps.add(db_record_date_time)
|
||
|
|
self._db_new_timestamps.add(db_record_date_time)
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Load (range)
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def db_load_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int:
|
||
|
|
"""Load records from database into memory.
|
||
|
|
|
||
|
|
Merges database records into in-memory records while preserving:
|
||
|
|
- Memory-only records
|
||
|
|
- Sorted order
|
||
|
|
- No duplicates (DB overwrites memory)
|
||
|
|
|
||
|
|
This requested load range is extended to include the first record < start_timestamp
|
||
|
|
and the first record >= end_timestamp, so nearest-neighbor searches do not require
|
||
|
|
additional DB lookups.
|
||
|
|
|
||
|
|
The `_db_loaded_range` is updated to reflect the total timestamp span
|
||
|
|
currently present in memory after this method completes.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
start_timestamp: Load records from this timestamp (inclusive)
|
||
|
|
end_timestamp: Load records until this timestamp (exclusive)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of records loaded from database
|
||
|
|
|
||
|
|
Note:
|
||
|
|
record.date_time shall be DateTime or None
|
||
|
|
"""
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if not self.db_enabled:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# Normalize boundaries immediately (strict DB layer rule)
|
||
|
|
if start_timestamp is None:
|
||
|
|
start_timestamp = UNBOUND_START
|
||
|
|
if end_timestamp is None:
|
||
|
|
end_timestamp = UNBOUND_END
|
||
|
|
|
||
|
|
# Extend boundaries to include first record < start and first record >= end
|
||
|
|
query_start, query_end = self._extend_boundaries(start_timestamp, end_timestamp)
|
||
|
|
|
||
|
|
if isinstance(query_start, _DatabaseTimestampUnbound):
|
||
|
|
start_key = None
|
||
|
|
else:
|
||
|
|
start_key = self._db_key_from_timestamp(query_start)
|
||
|
|
if isinstance(query_end, _DatabaseTimestampUnbound):
|
||
|
|
end_key = None
|
||
|
|
else:
|
||
|
|
end_key = self._db_key_from_timestamp(query_end)
|
||
|
|
|
||
|
|
namespace = self.db_namespace()
|
||
|
|
|
||
|
|
loaded_count = 0
|
||
|
|
|
||
|
|
# Iterate DB records (already sorted by key)
|
||
|
|
for db_key, value in self.database.iterate_records(
|
||
|
|
start_key=start_key,
|
||
|
|
end_key=end_key,
|
||
|
|
namespace=namespace,
|
||
|
|
):
|
||
|
|
if db_key == DATABASE_METADATA_KEY:
|
||
|
|
continue
|
||
|
|
|
||
|
|
record = self._db_deserialize_record(value)
|
||
|
|
db_record_date_time = DatabaseTimestamp.from_datetime(record.date_time)
|
||
|
|
|
||
|
|
# Do not resurrect explicitly deleted records
|
||
|
|
if db_record_date_time in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# ---- Memory is authoritative: skip if already present
|
||
|
|
if db_record_date_time in self._db_record_index:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Insert sorted
|
||
|
|
# - do not call self.db_insert_record - may call db_load_records recursively
|
||
|
|
# - see self.db_insert_record(record, mark_dirty=False)
|
||
|
|
index = bisect.bisect_left(self._db_sorted_timestamps, db_record_date_time)
|
||
|
|
self._db_sorted_timestamps.insert(index, db_record_date_time)
|
||
|
|
self.records.insert(index, record)
|
||
|
|
self._db_record_index[db_record_date_time] = record
|
||
|
|
|
||
|
|
loaded_count += 1
|
||
|
|
|
||
|
|
# Update range of timestamps the was already queried from database storage during load
|
||
|
|
if self._db_loaded_range is None:
|
||
|
|
# First load - initialize
|
||
|
|
self._db_loaded_range = query_start, query_end
|
||
|
|
else:
|
||
|
|
current_start, current_end = self._db_loaded_range
|
||
|
|
if query_start < current_start:
|
||
|
|
current_start = query_start
|
||
|
|
if query_end > current_end:
|
||
|
|
current_end = query_end
|
||
|
|
self._db_loaded_range = current_start, current_end
|
||
|
|
|
||
|
|
return loaded_count
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Delete (range)
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def db_delete_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int:
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# Deletion is global — ensure we see everything
|
||
|
|
self._db_ensure_loaded(
|
||
|
|
start_timestamp=start_timestamp,
|
||
|
|
end_timestamp=end_timestamp,
|
||
|
|
)
|
||
|
|
|
||
|
|
to_delete: list[DatabaseTimestamp] = []
|
||
|
|
|
||
|
|
for dt in list(self._db_sorted_timestamps):
|
||
|
|
if start_timestamp and dt < start_timestamp:
|
||
|
|
continue
|
||
|
|
if end_timestamp and dt >= end_timestamp:
|
||
|
|
continue
|
||
|
|
to_delete.append(dt)
|
||
|
|
|
||
|
|
for dt in to_delete:
|
||
|
|
record = self._db_record_index.pop(dt, None)
|
||
|
|
if record is not None:
|
||
|
|
idx = bisect.bisect_left(self._db_sorted_timestamps, dt)
|
||
|
|
if idx < len(self._db_sorted_timestamps) and self._db_sorted_timestamps[idx] == dt:
|
||
|
|
self._db_sorted_timestamps.pop(idx)
|
||
|
|
try:
|
||
|
|
self.records.remove(record)
|
||
|
|
except Exception as ex:
|
||
|
|
logger.debug(f"Failed to remove record: {ex}")
|
||
|
|
|
||
|
|
# Mark for physical deletion
|
||
|
|
self._db_deleted_timestamps.add(dt)
|
||
|
|
|
||
|
|
# If it was dirty (new record), cancel the insert instead
|
||
|
|
self._db_dirty_timestamps.discard(dt)
|
||
|
|
self._db_new_timestamps.discard(dt)
|
||
|
|
|
||
|
|
return len(to_delete)
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Iteration from DB (no duplicates)
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def db_iterate_records(
|
||
|
|
self,
|
||
|
|
start_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
end_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> Iterator[T_Record]:
|
||
|
|
"""Iterate records in requested range.
|
||
|
|
|
||
|
|
Ensures storage is loaded into memory first,
|
||
|
|
then iterates over in-memory records only.
|
||
|
|
"""
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
# Ensure memory contains required range
|
||
|
|
self._db_ensure_loaded(
|
||
|
|
start_timestamp=start_timestamp,
|
||
|
|
end_timestamp=end_timestamp,
|
||
|
|
)
|
||
|
|
|
||
|
|
for record in self.records:
|
||
|
|
record_date_time_timestamp = DatabaseTimestamp.from_datetime(record.date_time)
|
||
|
|
|
||
|
|
if start_timestamp and record_date_time_timestamp < start_timestamp:
|
||
|
|
continue
|
||
|
|
|
||
|
|
if end_timestamp and record_date_time_timestamp >= end_timestamp:
|
||
|
|
break
|
||
|
|
|
||
|
|
if record_date_time_timestamp in self._db_deleted_timestamps:
|
||
|
|
continue
|
||
|
|
|
||
|
|
yield record
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Dirty tracking
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def db_mark_dirty_record(self, record: T_Record) -> None:
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
record_date_time_timestamp = DatabaseTimestamp.from_datetime(record.date_time)
|
||
|
|
self._db_dirty_timestamps.add(record_date_time_timestamp)
|
||
|
|
|
||
|
|
# -----------------------------------------------------
|
||
|
|
# Bulk save (flush dirty only)
|
||
|
|
# -----------------------------------------------------
|
||
|
|
|
||
|
|
def db_save_records(self) -> int:
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if not self.db_enabled:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
if not self._db_dirty_timestamps and not self._db_deleted_timestamps:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
namespace = self.db_namespace()
|
||
|
|
|
||
|
|
# safer order: saves first, deletes last
|
||
|
|
|
||
|
|
# --- handle inserts/updates ---
|
||
|
|
save_items = []
|
||
|
|
for dt in self._db_dirty_timestamps:
|
||
|
|
record = self._db_record_index.get(dt)
|
||
|
|
if record:
|
||
|
|
key = self._db_key_from_timestamp(dt)
|
||
|
|
value = self._db_serialize_record(record)
|
||
|
|
save_items.append((key, value))
|
||
|
|
saved_count = len(save_items)
|
||
|
|
if saved_count:
|
||
|
|
self.database.save_records(save_items, namespace=namespace)
|
||
|
|
self._db_dirty_timestamps.clear()
|
||
|
|
self._db_new_timestamps.clear()
|
||
|
|
|
||
|
|
# --- handle deletions ---
|
||
|
|
if self._db_deleted_timestamps:
|
||
|
|
delete_keys = [self._db_key_from_timestamp(dt) for dt in self._db_deleted_timestamps]
|
||
|
|
self.database.delete_records(delete_keys, namespace=namespace)
|
||
|
|
deleted_count = len(self._db_deleted_timestamps)
|
||
|
|
self._db_deleted_timestamps.clear()
|
||
|
|
|
||
|
|
return saved_count + deleted_count
|
||
|
|
|
||
|
|
def db_autosave(self) -> int:
|
||
|
|
return self.db_save_records()
|
||
|
|
|
||
|
|
def db_vacuum(
|
||
|
|
self,
|
||
|
|
keep_hours: Optional[int] = None,
|
||
|
|
keep_timestamp: Optional[DatabaseTimestampType] = None,
|
||
|
|
) -> int:
|
||
|
|
"""Remove old records from database to free space.
|
||
|
|
|
||
|
|
Semantics:
|
||
|
|
|
||
|
|
- keep_hours is relative to the DB's max timestamp: cutoff = db_max - keep_hours, and records
|
||
|
|
with timestamp < cutoff are deleted.
|
||
|
|
- keep_timestamp is an absolute cutoff; records with timestamp < cutoff are deleted (exclusive).
|
||
|
|
|
||
|
|
Uses self.keep_duration() if both of keep_hours and keep_timestamp are None.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
keep_hours: Keep only records from the last N hours (relative to the data's max timestamp)
|
||
|
|
keep_timestamp: Keep only records from this timestamp on (absolute cutoff)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of records deleted
|
||
|
|
"""
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if keep_hours is None and keep_timestamp is None:
|
||
|
|
keep_duration = self.db_keep_duration()
|
||
|
|
if keep_duration is None:
|
||
|
|
# No vacuum if all is None
|
||
|
|
logger.info(
|
||
|
|
f"Vacuum requested for database '{self.db_namespace()}' but keep limit is infinite."
|
||
|
|
)
|
||
|
|
return 0
|
||
|
|
keep_hours = keep_duration.hours
|
||
|
|
|
||
|
|
if keep_hours is not None:
|
||
|
|
_, db_max = self.db_timestamp_range()
|
||
|
|
if db_max is None or isinstance(db_max, _DatabaseTimestampUnbound):
|
||
|
|
# No records
|
||
|
|
return 0 # nothing to delete
|
||
|
|
if keep_hours <= 0:
|
||
|
|
db_cutoff_timestamp: DatabaseTimestampType = UNBOUND_END
|
||
|
|
else:
|
||
|
|
# cutoff = first record we want to delete; everything before is removed
|
||
|
|
datetime_max: DateTime = DatabaseTimestamp.to_datetime(db_max)
|
||
|
|
db_cutoff_timestamp = DatabaseTimestamp.from_datetime(
|
||
|
|
datetime_max.subtract(hours=keep_hours - 1)
|
||
|
|
)
|
||
|
|
elif keep_timestamp is not None:
|
||
|
|
db_cutoff_timestamp = keep_timestamp
|
||
|
|
else:
|
||
|
|
raise ValueError("Must specify either keep_hours or keep_timestamp")
|
||
|
|
|
||
|
|
# Delete records
|
||
|
|
deleted_count = self.db_delete_records(end_timestamp=db_cutoff_timestamp)
|
||
|
|
|
||
|
|
self.db_save_records()
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
f"Vacuumed {deleted_count} old records from database '{self.db_namespace()}' "
|
||
|
|
f"(before {db_cutoff_timestamp})"
|
||
|
|
)
|
||
|
|
return deleted_count
|
||
|
|
|
||
|
|
def db_count_records(self) -> int:
|
||
|
|
"""Return total logical number of records.
|
||
|
|
|
||
|
|
Memory is authoritative. If DB is enabled but not fully loaded,
|
||
|
|
we conservatively include storage-only records.
|
||
|
|
"""
|
||
|
|
# Defensive call - model_post_init() may not have initialized metadata
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
if not self.db_enabled:
|
||
|
|
return len(self.records)
|
||
|
|
|
||
|
|
# If fully loaded, memory is complete view
|
||
|
|
if self._db_load_phase is DatabaseRecordProtocolLoadPhase.FULL:
|
||
|
|
return len(self.records)
|
||
|
|
|
||
|
|
storage_count = self.database.count_records(namespace=self.db_namespace())
|
||
|
|
pending_deletes = len(self._db_deleted_timestamps)
|
||
|
|
new_count = len(self._db_new_timestamps)
|
||
|
|
|
||
|
|
return storage_count + new_count - pending_deletes
|
||
|
|
|
||
|
|
def db_get_stats(self) -> dict:
|
||
|
|
"""Get comprehensive statistics about database storage.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Dictionary with statistics
|
||
|
|
"""
|
||
|
|
if not self.db_enabled:
|
||
|
|
return {"enabled": False}
|
||
|
|
|
||
|
|
ns = self.db_namespace()
|
||
|
|
|
||
|
|
stats = {
|
||
|
|
"enabled": True,
|
||
|
|
"backend": self.database.__class__.__name__,
|
||
|
|
"path": str(self.database.storage_path),
|
||
|
|
"memory_records": len(self.records),
|
||
|
|
"compression_enabled": self.database.compression,
|
||
|
|
"keep_duration_h": self.config.database.keep_duration_h,
|
||
|
|
"autosave_interval_sec": self.config.database.autosave_interval_sec,
|
||
|
|
"total_records": self.database.count_records(namespace=ns),
|
||
|
|
}
|
||
|
|
|
||
|
|
# Add backend-specific stats
|
||
|
|
stats.update(self.database.get_backend_stats(namespace=ns))
|
||
|
|
|
||
|
|
min_timestamp, max_timestamp = self.db_timestamp_range()
|
||
|
|
stats["timestamp_range"] = {
|
||
|
|
"min": str(min_timestamp),
|
||
|
|
"max": str(max_timestamp),
|
||
|
|
}
|
||
|
|
|
||
|
|
return stats
|
||
|
|
|
||
|
|
# ==================== Tiered Compaction ====================
|
||
|
|
|
||
|
|
def db_compact_tiers(self) -> list[tuple[Duration, Duration]]:
|
||
|
|
"""Compaction tiers as (age_threshold, target_interval) pairs.
|
||
|
|
|
||
|
|
Records older than age_threshold are downsampled to target_interval.
|
||
|
|
Tiers must be ordered from shortest to longest age threshold.
|
||
|
|
|
||
|
|
Default policy:
|
||
|
|
|
||
|
|
- older than 2 hours → 15 min resolution
|
||
|
|
- older than 14 days → 1 hour resolution
|
||
|
|
|
||
|
|
Return empty list to disable compaction entirely.
|
||
|
|
Override in derived classes for domain-specific behaviour.
|
||
|
|
|
||
|
|
Example override to disable:
|
||
|
|
|
||
|
|
.. code-block python
|
||
|
|
|
||
|
|
def db_compact_tiers(self):
|
||
|
|
return []
|
||
|
|
|
||
|
|
Example override for price data (already at 15 min, skip first tier):
|
||
|
|
|
||
|
|
.. code-block python
|
||
|
|
|
||
|
|
def db_compact_tiers(self):
|
||
|
|
return [
|
||
|
|
(to_duration("2 weeks"), to_duration("1 hour")),
|
||
|
|
]
|
||
|
|
|
||
|
|
.. comment
|
||
|
|
"""
|
||
|
|
return [
|
||
|
|
(to_duration("2 hours"), to_duration("15 minutes")),
|
||
|
|
(to_duration("14 days"), to_duration("1 hour")),
|
||
|
|
]
|
||
|
|
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
# Compaction state helpers (stored in namespace metadata)
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
|
||
|
|
def _db_get_compact_state(
|
||
|
|
self,
|
||
|
|
tier_interval: Duration,
|
||
|
|
) -> Optional[DatabaseTimestamp]:
|
||
|
|
"""Load the last compaction cutoff timestamp for a given tier interval.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
tier_interval: The target interval that identifies this tier.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
The last cutoff DatabaseTimestamp, or None if never compacted.
|
||
|
|
"""
|
||
|
|
if self._db_metadata is None:
|
||
|
|
return None
|
||
|
|
key = f"last_compact_cutoff_{int(tier_interval.total_seconds())}"
|
||
|
|
cutoff_str = self._db_metadata.get(key)
|
||
|
|
return DatabaseTimestamp(cutoff_str) if cutoff_str else None
|
||
|
|
|
||
|
|
def _db_set_compact_state(
|
||
|
|
self,
|
||
|
|
tier_interval: Duration,
|
||
|
|
cutoff_ts: DatabaseTimestamp,
|
||
|
|
) -> None:
|
||
|
|
"""Persist the last compaction cutoff timestamp for a given tier interval.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
tier_interval: The target interval that identifies this tier.
|
||
|
|
cutoff_ts: The cutoff timestamp to store.
|
||
|
|
"""
|
||
|
|
if self._db_metadata is None:
|
||
|
|
self._db_metadata = {}
|
||
|
|
key = f"last_compact_cutoff_{int(tier_interval.total_seconds())}"
|
||
|
|
self._db_metadata[key] = str(cutoff_ts)
|
||
|
|
self._db_save_metadata(self._db_metadata)
|
||
|
|
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
# Single-tier worker
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
|
||
|
|
def _db_compact_tier(
|
||
|
|
self,
|
||
|
|
age_threshold: Duration,
|
||
|
|
target_interval: Duration,
|
||
|
|
) -> int:
|
||
|
|
"""Downsample records older than age_threshold to target_interval resolution.
|
||
|
|
|
||
|
|
Only processes the window [last_compact_cutoff, new_cutoff) so repeated
|
||
|
|
runs are cheap.
|
||
|
|
|
||
|
|
The window boundaries are snapped to UTC epoch-aligned interval boundaries
|
||
|
|
before processing:
|
||
|
|
|
||
|
|
- ``window_start`` is floored to the nearest interval boundary at or before
|
||
|
|
the raw start. This guarantees that the first resampled bucket always
|
||
|
|
sits on a clock-round timestamp (e.g. :00/:15/:30/:45 for 15 min) and
|
||
|
|
that consecutive runs produce gapless, non-overlapping coverage.
|
||
|
|
- ``window_end`` (the new cutoff stored in metadata) is also floored, so
|
||
|
|
the boundary stored in metadata is always interval-aligned. Records
|
||
|
|
between the floored cutoff and the raw cutoff (``newest - age_threshold``)
|
||
|
|
are left untouched and will be picked up on the next run once more data
|
||
|
|
arrives and the floored cutoff advances.
|
||
|
|
|
||
|
|
Skips resampling entirely when the existing record count is already at or
|
||
|
|
below the number of buckets resampling would produce (sparse-data guard).
|
||
|
|
When data is sparse but timestamps are misaligned the guard is bypassed and
|
||
|
|
timestamps are snapped to interval boundaries without changing values.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
age_threshold: Records older than (newest - age_threshold) are compacted.
|
||
|
|
target_interval: Target resolution after compaction.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Number of original records deleted (before re-insertion of downsampled
|
||
|
|
records). Returns 0 if skipped.
|
||
|
|
"""
|
||
|
|
self._db_ensure_initialized()
|
||
|
|
|
||
|
|
interval_sec = int(target_interval.total_seconds())
|
||
|
|
if interval_sec <= 0:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# ---- Determine raw new cutoff ------------------------------------
|
||
|
|
_, db_max = self.db_timestamp_range()
|
||
|
|
if db_max is None or isinstance(db_max, _DatabaseTimestampUnbound):
|
||
|
|
return 0
|
||
|
|
|
||
|
|
newest_dt = DatabaseTimestamp.to_datetime(db_max)
|
||
|
|
raw_cutoff_dt = newest_dt - age_threshold
|
||
|
|
|
||
|
|
# Snap new_cutoff DOWN to the nearest interval boundary.
|
||
|
|
# Records in [floored_cutoff, raw_cutoff) are left alone until the next
|
||
|
|
# run — they are inside the age window but straddle an incomplete bucket.
|
||
|
|
raw_cutoff_epoch = int(raw_cutoff_dt.timestamp())
|
||
|
|
floored_cutoff_epoch = (raw_cutoff_epoch // interval_sec) * interval_sec
|
||
|
|
new_cutoff_dt = DateTime.fromtimestamp(floored_cutoff_epoch, tz="UTC")
|
||
|
|
new_cutoff_ts = DatabaseTimestamp.from_datetime(new_cutoff_dt)
|
||
|
|
|
||
|
|
# ---- Determine window start (incremental) ------------------------
|
||
|
|
last_cutoff_ts = self._db_get_compact_state(target_interval)
|
||
|
|
|
||
|
|
if last_cutoff_ts is not None and last_cutoff_ts >= new_cutoff_ts:
|
||
|
|
logger.debug(
|
||
|
|
f"Namespace '{self.db_namespace()}' tier {target_interval} already "
|
||
|
|
f"compacted up to {new_cutoff_ts}, skipping."
|
||
|
|
)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
db_min, _ = self.db_timestamp_range()
|
||
|
|
if db_min is None or isinstance(db_min, _DatabaseTimestampUnbound):
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# Raw window start: last cutoff or absolute db minimum
|
||
|
|
raw_window_start_ts = last_cutoff_ts if last_cutoff_ts is not None else db_min
|
||
|
|
if raw_window_start_ts >= new_cutoff_ts:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
raw_window_start_dt = DatabaseTimestamp.to_datetime(raw_window_start_ts)
|
||
|
|
|
||
|
|
# Snap window_start DOWN to the nearest interval boundary so the first
|
||
|
|
# resampled bucket is clock-aligned. This may pull the window slightly
|
||
|
|
# earlier than the last stored cutoff, which is safe: key_to_array with
|
||
|
|
# boundary="strict" only reads the window we pass and the re-insert step
|
||
|
|
# is idempotent for already-compacted records (they will simply be
|
||
|
|
# overwritten with the same values).
|
||
|
|
raw_start_epoch = int(raw_window_start_dt.timestamp())
|
||
|
|
floored_start_epoch = (raw_start_epoch // interval_sec) * interval_sec
|
||
|
|
window_start_dt = DateTime.fromtimestamp(floored_start_epoch, tz="UTC")
|
||
|
|
window_start_ts = DatabaseTimestamp.from_datetime(window_start_dt)
|
||
|
|
|
||
|
|
window_end_dt = new_cutoff_dt # exclusive upper bound, already aligned
|
||
|
|
window_end_ts = new_cutoff_ts
|
||
|
|
|
||
|
|
# ---- Sparse-data guard -------------------------------------------
|
||
|
|
existing_count = self.database.count_records(
|
||
|
|
start_key=self._db_key_from_timestamp(window_start_ts),
|
||
|
|
end_key=self._db_key_from_timestamp(window_end_ts),
|
||
|
|
namespace=self.db_namespace(),
|
||
|
|
)
|
||
|
|
|
||
|
|
window_sec = int((window_end_dt - window_start_dt).total_seconds())
|
||
|
|
# Maximum number of buckets resampling could produce (ceiling division)
|
||
|
|
resampled_count = (window_sec + interval_sec - 1) // interval_sec
|
||
|
|
|
||
|
|
if existing_count == 0:
|
||
|
|
# Nothing in window — just advance the cutoff
|
||
|
|
self._db_set_compact_state(target_interval, new_cutoff_ts)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
if existing_count <= resampled_count:
|
||
|
|
# Data is already sparse — check whether timestamps are aligned.
|
||
|
|
# If every record already sits on an interval boundary, nothing to do.
|
||
|
|
# If any are misaligned, snap them in place without resampling.
|
||
|
|
records_in_window = [
|
||
|
|
r
|
||
|
|
for r in self.records
|
||
|
|
if r.date_time is not None and window_start_dt <= r.date_time < window_end_dt
|
||
|
|
]
|
||
|
|
misaligned = [
|
||
|
|
r for r in records_in_window if int(r.date_time.timestamp()) % interval_sec != 0
|
||
|
|
]
|
||
|
|
if not misaligned:
|
||
|
|
logger.debug(
|
||
|
|
f"Skipping tier {target_interval} compaction for "
|
||
|
|
f"namespace '{self.db_namespace()}': "
|
||
|
|
f"existing={existing_count} <= resampled={resampled_count} "
|
||
|
|
f"and all timestamps already aligned "
|
||
|
|
f"(window={window_start_dt}..{window_end_dt})"
|
||
|
|
)
|
||
|
|
self._db_set_compact_state(target_interval, new_cutoff_ts)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# ---- Sparse but misaligned: full window rewrite -----------------
|
||
|
|
# Delete the entire window and reinsert floor-snapped records.
|
||
|
|
# Deleting first guarantees no duplicate-timestamp ValueError on
|
||
|
|
# reinsert, even when an already-aligned record sits at the same
|
||
|
|
# epoch that a misaligned record floors to.
|
||
|
|
logger.debug(
|
||
|
|
f"Rewriting sparse window in namespace '{self.db_namespace()}' "
|
||
|
|
f"tier {target_interval} (existing={existing_count}, "
|
||
|
|
f"resampled={resampled_count})"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Build snapped buckets from ALL records in window.
|
||
|
|
# Process chronologically so the earliest record's values win when
|
||
|
|
# multiple records floor to the same bucket.
|
||
|
|
snapped_bucket: dict[int, dict[str, Any]] = {}
|
||
|
|
for r in sorted(records_in_window, key=lambda x: x.date_time):
|
||
|
|
ts_epoch = int(r.date_time.timestamp())
|
||
|
|
snapped_epoch = (ts_epoch // interval_sec) * interval_sec
|
||
|
|
bucket = snapped_bucket.setdefault(snapped_epoch, {})
|
||
|
|
for key in self.record_keys_writable:
|
||
|
|
if key == "date_time":
|
||
|
|
continue
|
||
|
|
try:
|
||
|
|
val = r[key]
|
||
|
|
except KeyError:
|
||
|
|
continue
|
||
|
|
if val is not None and bucket.get(key) is None:
|
||
|
|
bucket[key] = val
|
||
|
|
|
||
|
|
# Delete entire window (aligned + misaligned)
|
||
|
|
deleted = self.db_delete_records(
|
||
|
|
start_timestamp=window_start_ts,
|
||
|
|
end_timestamp=window_end_ts,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Reinsert one record per bucket
|
||
|
|
for snapped_epoch, values in snapped_bucket.items():
|
||
|
|
if not values:
|
||
|
|
continue
|
||
|
|
snapped_dt = DateTime.fromtimestamp(snapped_epoch, tz="UTC")
|
||
|
|
record = self.record_class()(date_time=snapped_dt, **values)
|
||
|
|
self.db_insert_record(record, mark_dirty=True)
|
||
|
|
|
||
|
|
self.db_save_records()
|
||
|
|
self._db_set_compact_state(target_interval, new_cutoff_ts)
|
||
|
|
logger.info(
|
||
|
|
f"Rewrote sparse window in namespace '{self.db_namespace()}' "
|
||
|
|
f"tier {target_interval}: deleted={deleted}, "
|
||
|
|
f"reinserted={len(snapped_bucket)} buckets "
|
||
|
|
f"(window={window_start_dt}..{window_end_dt})"
|
||
|
|
)
|
||
|
|
return deleted
|
||
|
|
|
||
|
|
# ---- Full resampling path ----------------------------------------
|
||
|
|
# boundary="context" is used here instead of "strict" so that key_to_array
|
||
|
|
# can include one record on each side of the window for proper interpolation
|
||
|
|
# at the edges. The truncation inside key_to_array then clips the result
|
||
|
|
# back to [window_start_dt, window_end_dt) so no out-of-window values are
|
||
|
|
# ever written back. align_to_interval=True ensures buckets land on
|
||
|
|
# clock-round timestamps regardless of window_start_dt precision.
|
||
|
|
compacted_data: dict[str, Any] = {}
|
||
|
|
compacted_timestamps: list[DateTime] = []
|
||
|
|
|
||
|
|
for key in self.record_keys_writable:
|
||
|
|
if key == "date_time":
|
||
|
|
continue
|
||
|
|
try:
|
||
|
|
array = self.key_to_array(
|
||
|
|
key,
|
||
|
|
start_datetime=window_start_dt,
|
||
|
|
end_datetime=window_end_dt,
|
||
|
|
interval=target_interval,
|
||
|
|
fill_method="time",
|
||
|
|
boundary="context",
|
||
|
|
align_to_interval=True,
|
||
|
|
)
|
||
|
|
except (KeyError, TypeError, ValueError):
|
||
|
|
continue # non-numeric or missing key — skip silently
|
||
|
|
|
||
|
|
if len(array) == 0:
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Build the shared timestamp spine once from the first successful key.
|
||
|
|
# The spine is derived from the actual resampled index, not from
|
||
|
|
# db_generate_timestamps, so it matches exactly what key_to_array
|
||
|
|
# produced (epoch-aligned, truncated to window).
|
||
|
|
if not compacted_timestamps:
|
||
|
|
raw_start_epoch_aligned = (
|
||
|
|
int(window_start_dt.timestamp()) // interval_sec
|
||
|
|
) * interval_sec
|
||
|
|
first_bucket_epoch = raw_start_epoch_aligned
|
||
|
|
# Advance to first bucket >= window_start_dt (truncation in key_to_array
|
||
|
|
# removes any bucket before window_start_dt)
|
||
|
|
while first_bucket_epoch < int(window_start_dt.timestamp()):
|
||
|
|
first_bucket_epoch += interval_sec
|
||
|
|
compacted_timestamps = [
|
||
|
|
DateTime.fromtimestamp(first_bucket_epoch + i * interval_sec, tz="UTC")
|
||
|
|
for i in range(len(array))
|
||
|
|
]
|
||
|
|
|
||
|
|
# Guard against length mismatch between keys
|
||
|
|
if len(array) == len(compacted_timestamps):
|
||
|
|
compacted_data[key] = array
|
||
|
|
|
||
|
|
if not compacted_data or not compacted_timestamps:
|
||
|
|
# Nothing to write back — still advance cutoff
|
||
|
|
self._db_set_compact_state(target_interval, new_cutoff_ts)
|
||
|
|
return 0
|
||
|
|
|
||
|
|
# ---- Delete originals, re-insert downsampled records -------------
|
||
|
|
deleted = self.db_delete_records(
|
||
|
|
start_timestamp=window_start_ts,
|
||
|
|
end_timestamp=window_end_ts,
|
||
|
|
)
|
||
|
|
|
||
|
|
for i, dt in enumerate(compacted_timestamps):
|
||
|
|
values = {
|
||
|
|
key: arr[i]
|
||
|
|
for key, arr in compacted_data.items()
|
||
|
|
if i < len(arr) and arr[i] is not None
|
||
|
|
}
|
||
|
|
if values:
|
||
|
|
record = self.record_class()(date_time=dt, **values)
|
||
|
|
self.db_insert_record(record, mark_dirty=True)
|
||
|
|
|
||
|
|
self.db_save_records()
|
||
|
|
|
||
|
|
# Persist the aligned new cutoff for this tier
|
||
|
|
self._db_set_compact_state(target_interval, new_cutoff_ts)
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
f"Compacted tier {target_interval}: deleted {deleted} records in "
|
||
|
|
f"namespace '{self.db_namespace()}' "
|
||
|
|
f"(window={window_start_dt}..{window_end_dt}, "
|
||
|
|
f"reinserted={len(compacted_timestamps)})"
|
||
|
|
)
|
||
|
|
return deleted
|
||
|
|
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
# Public entry point
|
||
|
|
# ------------------------------------------------------------------
|
||
|
|
|
||
|
|
def db_compact(
|
||
|
|
self,
|
||
|
|
compact_tiers: Optional[list[tuple[Duration, Duration]]] = None,
|
||
|
|
) -> int:
|
||
|
|
"""Apply tiered compaction policy to all records in this namespace.
|
||
|
|
|
||
|
|
Tiers are processed coarsest-first (longest age threshold first) to
|
||
|
|
avoid compacting fine-grained data that an inner tier would immediately
|
||
|
|
re-compact anyway.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
compact_tiers: Override tiers for this call. If None, uses
|
||
|
|
db_compact_tiers(). Each entry is (age_threshold, target_interval),
|
||
|
|
ordered shortest to longest age threshold.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Total number of original records deleted across all tiers.
|
||
|
|
"""
|
||
|
|
if compact_tiers is None:
|
||
|
|
compact_tiers = self.db_compact_tiers()
|
||
|
|
|
||
|
|
if not compact_tiers:
|
||
|
|
return 0
|
||
|
|
|
||
|
|
total_deleted = 0
|
||
|
|
|
||
|
|
# Coarsest tier first (reversed) to avoid redundant work
|
||
|
|
for age_threshold, target_interval in reversed(compact_tiers):
|
||
|
|
total_deleted += self._db_compact_tier(age_threshold, target_interval)
|
||
|
|
|
||
|
|
return total_deleted
|