Source code for scistag.common.essential_data

import logging
import os
from threading import RLock

from scistag.common.configuration import (ESSENTIAL_DATA_URL,
                                          ESSENTIAL_DATA_ARCHIVE_NAME,
                                          ESSENTIAL_DATA_MD5,
                                          ESSENTIAL_DATA_SIZE)
import hashlib

data_prepared_lock = RLock()
data_prepared = False


[docs]def _prepare_essential_data() -> bool: """ Downloads the essential data required for SciStag from the web :return: True on success """ global data_prepared global data_prepared_lock with data_prepared_lock: if data_prepared: return True if not os.path.exists(ESSENTIAL_DATA_ARCHIVE_NAME): verify_essential_data() from scistag.filestag import SharedArchive SharedArchive.register(ESSENTIAL_DATA_ARCHIVE_NAME, "scistagessential", cache=False) success = os.path.exists(ESSENTIAL_DATA_ARCHIVE_NAME) data_prepared = True return success
[docs]def verify_data(identifier, archive_name, source_url, exp_size, exp_md5, quick=False) -> bool: """ Verifies the SciStag data is available, downloads it if not and verifies it's integrity :param identifier: The zip archive's global identifier :param archive_name: The archive's name :param source_url: The url at where the current data set is provided :param quick: Defines if a fast check shall be executed :param exp_size: The expected file size :param exp_md5: The expected md5 value :return: True on success """ from scistag.webstag.web_fetch import web_fetch path = archive_name file_size = os.path.getsize(path) if os.path.exists(path) else 0 invalid_filesize = file_size != exp_size if invalid_filesize: logging.warning( f"Data file {archive_name} is invalid. Removing and re-downloading it.") if not os.path.exists(path): logging.info(f"Downloading {source_url}...") web_fetch(source_url, filename=path) if not quick: digest = hashlib.md5(open(path, 'rb').read()).hexdigest() valid = digest == exp_md5 if not valid: raise Exception( f"MD5 checksum verification for {archive_name} failed") from scistag.filestag import SharedArchive SharedArchive.register(archive_name, identifier, cache=False) return os.path.exists(path)
[docs]def verify_essential_data(quick=False) -> bool: """ Verifies the SciStag data is available, downloads it if not and verifies it's integrity :param quick: Defines if a fast check shall be executed :return: True on success """ return \ verify_data(identifier="scistagessential", archive_name=ESSENTIAL_DATA_ARCHIVE_NAME, source_url=ESSENTIAL_DATA_URL, exp_size=ESSENTIAL_DATA_SIZE, exp_md5=ESSENTIAL_DATA_MD5, quick=quick)
_ESSENTIAL_DATA_PATH = "zip://@scistagessential/"
[docs]def get_edp() -> str: """ Verifies if the essential data archive was initialized and afterwards returns the FileStag access path to it. :return: The base path to access data within the essential data archive. See filestag.FileStag """ _prepare_essential_data() return _ESSENTIAL_DATA_PATH