Source code for scistag.common.cache

"""
Implements the :class:`Cache` class which allows easy caching data on disk
and in memory to minimize repetitive downloads and re-computations.
"""
from __future__ import annotations

import hashlib
import time

from typing import Any, Callable

from scistag.common.mt.stag_lock import StagLock


[docs]class Cache: """ The Cache class shall help caching computation results, downloaded data but also objects with large wind-up time (such as neural networks) between execution sessions. .. code-block: python # caching in memory without version my_cache = Cache() def complex_computation(): time.sleep(5.0) # just as dummy return np.ones((8,8), dtype=float) def rendering_function() global my_cache my_data = my_cache.cache("data", complex_computation) display(my_data) rendering_function() # will take 5.0 seconds on the first run rendering_function() # 0 seconds as "data" will be found in the cache # with version - all cache entries !=5 will be ignored my_cache = Cache(version=5) # only cache entries with 5 will be valid ... # with per-element or "sub"-version. # the cache will always combine the cache object's version and the # version of the element. So in this case to "5.4" def rendering_function() global my_cache my_data = my_cache.cache("data@4", complex_computation) # or # my_data = my_cache.cache("data", complex_computation, version=4) display(my_data) # caching on disk between execution runs # if you run the cache "manually" you have to set a version number to # cache data between multiple execution sessions on disk. my_data = my_cache.cache("./data@5", complex_computation) If you pass a version of 0 the :meth:`get_app_session_id` will be used instead which will usually change on every restart except you set it yourself via set_app_session_id or are using a smart autoreloader such as VisualLog's auto-reload capabilities which ensure this for you. """ _app_session_id: int = int(time.time() * 1000) """ The current run's session ID. is updated every re-start usually, may be stored and restored by helper classes such as the VisualLogAutoReloader between application restarts. """ def __init__(self, version: int = 1, cache_dir: str = None): """ :param version: The cache version. 1 by default. When ever you change this version all old cache values will be removed and/or ignored from the cache. :param cache_dir: The directory in which the cache data shall be stored """ self._access_lock = StagLock() "Multithreading access lock" self._mem_cache = {} """ A cache for temporary data storage and objects with a life time bound to this component's usage state. A cache which whose content shall only live between a handle_load() and handle_unload(). This can be used to store data only while a component is being used, a Widget is visible etc. See :meth:`handle_load`. """ self._mem_cache_versions = {} "Version numbers for the elements stored in the memory cache" self._mem_cache_params = {} "Parameters for the elements stored in the memory cache" from scistag.common.disk_cache import DiskCache self._disk_cache = DiskCache(version=version, cache_dir=cache_dir) "Cache for persisting data between execution sessions" self._version = version """ The cache version. It is stored along all cache values stored on disk and only elements sharing the same version will be accepted. """ self.loaded = False "Defines if the component was correctly loaded" self._is_loading = False """ Flag which tells if the component is currently being loaded and if new values added to the cache via ``self["objectName"]`` shall be flagged as volatile. During the execution of the :meth:`load` function and their event handlers such as :meth:`Widget.handle_load()` this value is set to true. When new Widgets are added during this time or new values are added to the cache, e.g. via my_component["data"] = load_data() they are flagged as volatile. After the execution of unload() all volatile entries will be deleted from the cache and all widgets added as child will be automatically removed again. """ self._volatile_cache_entries = set() """ Stores which cache entries shall be deleted upon the execution of :meth:`unload`. If a cache entry is added while ``_is_loading`` is set to True it will be added to this set. Upon the execution of ``unload()`` all elements named in this list will be removed from the :attr:`_cache`. In addition you may add object member variables via :meth:`add_volatile_member` which do not get removed but automatically cleared upon execution of unload. """ @property def version(self) -> int: """ Returns the cache version """ return self._version
[docs] @classmethod def get_app_session_id(cls) -> int: """ Returns the app session id which is generated upon each application restart. """ return cls._app_session_id
[docs] @classmethod def override_app_session_id(cls, session_id: int): """ Manually overrides the application's session ID. Should only be used by application session managers. :param session_id: The new session id """ cls._app_session_id = session_id
[docs] @classmethod def get_key_and_version(cls, key, major, minor=None) -> tuple[str, str]: """ Returns the effective key, version combination to search for a key in combination with the cache's and the element's version. :param key: The key (potentially still containing @version at it's end), either "key" or "key@version", e.g. "myDb@2". :param major: The major version (provided by the cache object) :param minor: The minor version, per element. Default 0 for memory cache elements and 1 for disk cache entries. :return: The effective key and version with which the element shall be persisted and which we assume upon restore. Following rules apply: - minor >= 1: The cache's version and the minor version are combined. So if you change either the cache's or the element version the element gets invalidated. - minor <= -1: The data should be handled as "constant" which basically never changes. Changing the cache's version has thus no effect, only if you directly change the element's version. - minor equals 0: The session's ID will be used as minor version. This invalidates all cache entries when you completely close the app or service (including auto-restart mechanisms, e.g. for live-editing). """ if minor is None: minor = 1 if "/" in key else 0 if "@" in key: split_val = key.split("@") assert len(split_val) == 2 key: str = split_val[0] minor = split_val[1] minor = str(minor) if minor == "0": # session id only as cache version return key, f"{major}.{cls._app_session_id}" if minor.startswith("-"): # minor only return key, f"{minor}" return key, f"{major}.{minor}"
[docs] def cache(self, key: str, generator: Callable, *args, **kwargs): """ Tries to find the element **key** in the cache and return's it's content. If no element with such a name and/or version number can be found **generator** be called to generate the data, store it in the cache for the next execution. :param key: The key of the cache element :param generator: The function to call if the element is not stored in the cache yet. :keyword version: The version to assign to the cache entry. If either this or the cache's main version get modified all prior cache entries will be ignored until they were update to this new version. Default version for memory cache entries is 0, for disk cache entries 1. :keyword hash_val: A single value, a list of values or a dict of values of which a hash value is computed and added to the version number to automatically invalidate it if any of the values changed. :param args: Argument parameters to be passed into the generator :param kwargs: Keyword parameter to be passed into the generator :return: The cached or newly created content """ hash_val = kwargs.pop("hash_val", None) version = kwargs.pop("version", None) if hash_val is not None: version = str(version) if version else "0" from scistag.filestag import Bundle hash_data = Bundle.bundle(hash_val, compression=0) version += "." + hashlib.md5(hash_data).hexdigest() arg_list = list(args) param_list = [] # build effective argument and keyword argument lists params = {} if len(kwargs): # store key words in params for ele_key, item in kwargs.items(): params[ele_key] = item func_params = dict(params) for index, element in enumerate(param_list): params[f"_arg{index + len(param_list)}"] = element arg_list.append(element) if len(args) > 0: # attach arguments to parameter set if provided for index, element in enumerate(args): params[f"_arg{index + len(param_list)}"] = element # try to fetch from cache with self._access_lock: old_value = self.get(key, version=version, params=params) if old_value is not None: # cached? fine return old_value new_data = generator(*arg_list, **func_params) # update cache otherwise with self._access_lock: self.set(key, new_data, params, version=version) return new_data
[docs] def set(self, key: str, value, params: dict | None = None, version=None): """ Adds an item to the cache or updates it. If a value is added (for the first time) during the execution of the component's ``load()`` or ``handle_load`` method it is flagged as volatile and will be automatically removed again upon the execution of ``unload()`` / ``handle_unload()``. :param key: The item's name :param value: The value to assign :param params: The parameters associated with the creation of value and to be stored (and verified) in combination with the version. :param version: The version of the cache element. By default 0 for memory cache elements and 1 for disk cache elements. """ with self._access_lock: org_key = key key, eff_version = self.get_key_and_version(key, self._version, minor=version) assert len(key) > 0 if params is None: params = {} if not key[0].isalpha() and not key.startswith( "./") and not key.startswith("_"): raise ValueError("Keys has to start with a character") if "/" in key: self._disk_cache.set(org_key, value, params=params, version=version) return # flag of volatile if added during loading process if key not in self._mem_cache and self._is_loading: self._volatile_cache_entries.add(key) self._mem_cache[key] = value self._mem_cache_versions[key] = eff_version self._mem_cache_params[key] = params
[docs] def get(self, key: str, default=None, version=None, params: dict | None = None, hash_params=False): """ Returns a value from the cache. If the element does not exist a ValueError exception will be raised. :param key: The item's name. :param version: The cache element's version. By default 0 for memory cache elements and 1 for disk cache elements. :param params: The parameters which were passed into the function and still need t match. :param hash_params: Defines the parameters shall be hashed to detect modifications. :param default: The value to return by default if no cache element could be found. :return: The item's value. Returns default if no value can be found. """ with self._access_lock: if params is None: params = {} org_key = key key, eff_version = self.get_key_and_version(key, self._version, minor=version) if "/" in key: data = self._disk_cache.get(org_key, params=params, hash_params=hash_params, version=version) if data is None: return default return data if (key in self._mem_cache and self._mem_cache_versions[key] == eff_version and (not hash_params or self._mem_cache_params[key] == params)): return self._mem_cache[key] else: return default
[docs] def load(self): """ Call this before you start using a component for the first time. The ``scistag.slidestag.widget.Widget`` class does this automatically for all of its children when a Widget becomes visible. """ with self._access_lock: if self.loaded: raise RuntimeError("Tried to load component twice") self._is_loading = True self.handle_load() if not self.loaded: raise RuntimeError( "loaded flag of component not correctly set to True. " "Did you forget to call super().handle_load()?")
[docs] def unload(self): """ Call this to unload all data from your component which was created during the handle_load execution and not flagged via a slash ("/") in its name as element to cache on disk. """ with self._access_lock: if not self.loaded: raise RuntimeError( "Tried to unload component which was not loaded before") self.handle_unload() if self.loaded: raise RuntimeError( "loaded flag of component not correctly set to False. " "Did you forget to call super().handle_unload()?") for element in self._volatile_cache_entries: element: str if element.startswith("."): # clear volatile members member_name = element[1:] if member_name in self.__dict__: self.__dict__[member_name] = None # delete volatile cache entries elif element in self._mem_cache: del self._mem_cache[element]
[docs] def get_is_loading(self) -> bool: """ Returns if the component is currently being loaded :return: True if ``load`` is currently being executed for this component. """ with self._access_lock: return self._is_loading
[docs] def handle_load(self): """ Event handling function for dynamically loading data on demand. SciStag's ``load`` and ``unload`` mechanism shall help minimizing the memory footprint of the application using it. If you have temporary data, for example a database which is just used while a component is used, a Slide or an ImageView while they are visible please overwrite this function, call it's ancestor and then store your data in the "cache". You can do so by using the bracket operator like ``self['db'] = pd.read_csv(...)``, check if data is available via ``if 'db' in self: ...`` and access it via ``my_db = self['db']`` accordingly. All data stored this way will automatically get cleared and removed from the cache when the `unload` function is called, e.g. when the Slide or Widget disappears or when you call it for your custom component. If you want to use member variables for storing your temporary data you can do so by calling :meth:`add_volatile_member` and passing their name. Upon unloading ``None`` will be assigned to all registered variables. **Note**: When overwriting this method call ``super().handle_load()`` at the beginning of yours. """ with self._access_lock: self.loaded = True
[docs] def handle_unload(self): """ Event handler for unloading elements previously loaded in your handle_load function. **Note**: When overwriting this method call ``super().handle_unload()`` at **end** beginning of yours. """ with self._access_lock: self.loaded = False
[docs] def add_volatile_member(self, name: str) -> None: """ Adds a member to the volatile cache entry variable list so it can automatically be cleared upon unloading of this component. When the component is unloaded (e.g. because a Widget or a Slide disappears) all members are automatically set to None to prevent that these objects are kept alive. :param name: The name of the member variable to be added to the volatile list. """ with self._access_lock: self._volatile_cache_entries.add("." + name)
def __setitem__(self, key: str, value): self.set(key, value) def __getitem__(self, key) -> Any: with self._access_lock: result = self.get(key) if (result is None and key not in self._mem_cache and key not in self._disk_cache): raise KeyError(f"Key {key} not found") return result
[docs] def __delitem__(self, key): """ Deletes an element from the cache. If the element does not exist a ValueError exception will be raised. :param key: The element's name """ with self._access_lock: key, eff_version = self.get_key_and_version(key, self._version) if "/" in key: self._disk_cache.delete(key) if key not in self._mem_cache: raise KeyError(f"The value {key} is not defined in the cache") del self._mem_cache[key] del self._mem_cache_versions[key] del self._mem_cache_params[key]
[docs] def __contains__(self, key) -> bool: """ Returns if an element exists in the cache. :param key: The item's name :return: True if the item exists. """ with self._access_lock: key, eff_version = self.get_key_and_version(key, self._version) if "/" in key: return key in self._disk_cache return (key in self._mem_cache and self._mem_cache_versions[key] == eff_version)