"""
Implements the class :class:`DiskCache` which allows you to fast and easy
store / dump data such as transformed Pandas dataframes or numpy objects
on disk and to quickly restore them afterwards with a simple version
management.
"""
from __future__ import annotations
import hashlib
import os
from typing import Any
from scistag.common import StagLock
from scistag.filestag import FileStag, Bundle
from scistag.common.cache import Cache
BUNDLE_EXTENSION = ".stbun"
"File extension for a SciStag bundle"
[docs]class DiskCache:
"""
Helper class to persist data such as computation results on disk.
This class is usually not used directly, see :class:`Cache` which makes
use of the DiskCache. All elements you store with a beginning "./"
using the **Cache** class will automatically be stored on disk, all
entries without in memory.
"""
def __init__(self,
version: int = 1,
cache_dir: str | None = None):
"""
:param version: The cache version. 1 by default.
When ever you change this version all old cache values will be
removed and/or ignored from the cache.
:param cache_dir: The directory in which the data shall be cached
"""
if cache_dir is None:
os.path.abspath("./.stscache")
self.cache_dir = cache_dir
self._version = version
"""
The cache version.
It is stored along all cache values stored on disk and only elements
sharing the same version will be accepted.
"""
self._access_lock = StagLock()
self.valid = cache_dir is not None
"""
Defines if the cache is valid
"""
self.dir_created = False
@property
def version(self) -> int:
"""
Returns the cache version
"""
return self._version
[docs] @staticmethod
def encode_name(name):
"""
Encodes the name of the object to be cached to a unique hash
:param name: The name of the data
:return: The encoded name
"""
encoded_name = hashlib.md5(name.encode("utf-8")).hexdigest()
return encoded_name
[docs] def get_cache_name(self, name):
"""
Encodes the name of the object to be cached to a unique hash
:param name: The name of the data
:return: The encoded name
"""
if not self.valid:
raise AssertionError(
"Disk cache not configured. Please provide a valid cache_dir.")
encoded_name = f"{self.cache_dir}/{self.encode_name(name)}"
return encoded_name
[docs] def _ensure_cache_dir(self):
"""
Verifies the caching directory is present
"""
with self._access_lock:
if not self.dir_created:
os.makedirs(self.cache_dir, exist_ok=True)
[docs] def set(self, key: str,
value: Any,
params: dict,
version: int | str = 1,
hash_params: bool = False
):
"""
Persists a single value in the cache
:param key: The name of the object to cache or a combination of
key and version separated by an @ sign, e.g. "database@1"
:param value: The element's value
:param version: The cache version for this entry.
:param params: The creation parameters which were passed into
the loading function and should match upon a cache fetch try.
:param hash_params: Defines the parameters shall be hashed to
detect modifications.
"""
key, eff_version = Cache.get_key_and_version(key,
self._version,
version)
if params is None or not hash_params:
params = {}
with self._access_lock:
params = dict(params)
params["__version"] = eff_version
with self._access_lock:
self._ensure_cache_dir()
cache_name = self.get_cache_name(key)
bundle_fn = cache_name + BUNDLE_EXTENSION
FileStag.save(cache_name,
Bundle.bundle({"data": value,
"version": 1}))
FileStag.save(bundle_fn, Bundle.bundle(params))
[docs] def get(self,
key,
params: dict = None,
version: int | str = 1,
hash_params: bool = False,
default=None) -> Any | None:
"""
Tries to read an element from the disk cache.
:param key: The name of the object to load from cache or a combination
of key and version separated by an @ sign, e.g. "database@1"
:param params: The creation parameters which were passed into
the loading function and still should match.
:param version: The assumed version of this element we are searching
for. If the version does not match the old entry is ignored.
:param hash_params: Defines the parameters shall be hashed to
detect modifications.
:param default: The default value to return if no cache entry could
be found
:return: Either the cache data or the default value as fallback
"""
with self._access_lock:
key, eff_version = Cache.get_key_and_version(key,
self._version,
version)
cache_name = self.get_cache_name(key)
if params is None or not hash_params:
params = {}
params = dict(params)
with self._access_lock:
params["__version"] = eff_version
stored_params = {}
bundle_fn = cache_name + BUNDLE_EXTENSION
stream_data = FileStag.load(cache_name)
if stream_data is None:
return default
bundle_data = Bundle.unpack(stream_data)
assert bundle_data.get("version", 0) == 1
data = bundle_data["data"]
if FileStag.exists(bundle_fn):
stored_params = Bundle.unpack(FileStag.load(bundle_fn))
if stored_params != params:
return default
return data
[docs] def delete(self, key) -> bool:
"""
Deletes a single cache entry
:param key: The cache's key
:return: True if the element was found and deleted
"""
with self._access_lock:
cache_name = self.get_cache_name(key)
bundle_fn = cache_name + BUNDLE_EXTENSION
FileStag.delete(bundle_fn)
if FileStag.exists(cache_name):
return FileStag.delete(cache_name)
return False
def __contains__(self, key):
if not self.valid:
return False
with self._access_lock:
key, eff_version = Cache.get_key_and_version(key, self._version)
cache_name = self.get_cache_name(key)
return FileStag.exists(cache_name)