from __future__ import annotations
import json
import os
from typing import Union
from scistag.webstag import web_fetch
HTTPS_PROTOCOL_URL_HEADER = "https://"
"""
Definition for the beginning of a https url to check if a file has to be fetched
from the web
"""
HTTP_PROTOCOL_URL_HEADER = "http://"
"""
Definition for the beginning of a http url to check if a file has to be fetched
from the web
"""
FILE_PATH_PROTOCOL_URL_HEADER = "file://"
"Definition for the beginning of a local file url"
FileSourceTypes = Union[str]
"""
The file source path.
This can either be a classic local file name path, the path to the URL
of a file or any other protocol supported by FileStag such as
zip://zipFilename/fileNameInZip.
"""
FileTargetTypes = Union[str]
"""
The file target path.
This can either be a classic local file name path, the path to the URL
of a file or any other protocol supported by FileStag such as
zip://zipFilename/fileNameInZip.
"""
[docs]class FileStag:
"""
Helper class to load data from a variety of sources such as local files,
registered archives of the web
"""
[docs] @classmethod
def is_simple(cls,
filename: FileSourceTypes | FileTargetTypes) -> bool:
"""
Returns if the file path points to a simple file on disk which does not
require loading it to memory
:param filename: The file's source such as a local filename or URL.
See :class:`FileNameType`
:return: True if it is a normal, local file
"""
if "://" in filename:
return False
return True
[docs] @classmethod
def load(cls,
source: FileSourceTypes,
**params) -> bytes | None:
"""
Loads a file by filename from a local file, a registered web archive
or the web
:param source: The file's source such as a local filename or URL.
See :class:`FileNameType`
:param params: Advanced loading params passed to the file loader such
as ``timeout_s`` or ``max_cache_age`` for files from the web.
:return: The data if the file could be found
"""
from .shared_archive import SharedArchive
from scistag.filestag import ZIP_SOURCE_PROTOCOL
if source.startswith(FILE_PATH_PROTOCOL_URL_HEADER):
source = source[len(FILE_PATH_PROTOCOL_URL_HEADER):]
if source.startswith(ZIP_SOURCE_PROTOCOL):
return SharedArchive.load_file(source)
if source.startswith(HTTP_PROTOCOL_URL_HEADER) or source.startswith(
HTTPS_PROTOCOL_URL_HEADER):
return web_fetch(source, **params)
if os.path.exists(source):
return open(source, "rb").read()
else:
return None
[docs] @classmethod
def save(cls,
target: FileTargetTypes,
data: bytes, **params) -> bool:
"""
Saves data to a file
:param target: The file's target name, see :meth:`load_file`.
:param data: The data to be stored
:param params: The advanced storage parameters, depending on the
type of storage, such as timeout_s for file's stored via network.
:return: True on success
"""
if target.startswith(FILE_PATH_PROTOCOL_URL_HEADER):
target = target[len(FILE_PATH_PROTOCOL_URL_HEADER):]
if not cls.is_simple(target):
raise NotImplementedError("At the moment only local file storage"
"is supported")
try:
with open(target, "wb") as output_file:
output_file.write(data)
except:
return False
return True
[docs] @classmethod
def delete(cls,
target: FileTargetTypes,
**params) -> bool:
"""
Deletes a file
:param target: The file's target name, see :meth:`load_file` for
the supported protocols.
:param params: The advanced storage parameters, depending on the
type of storage, such as timeout_s for file's stored via network.
:return: True on success
"""
if target.startswith(FILE_PATH_PROTOCOL_URL_HEADER):
target = target[len(FILE_PATH_PROTOCOL_URL_HEADER):]
if not cls.is_simple(target):
raise NotImplementedError("At the moment only local file deletion"
"is supported")
try:
os.remove(target)
except FileNotFoundError:
return False
return True
[docs] @classmethod
def load_text(cls,
source: FileSourceTypes,
encoding: str = "utf-8",
**params) -> str | None:
"""
Loads a text file from a given file source
:param source: The file's source, see :meth:`load_file`.
:param encoding: The text encoding. utf-8 by default
:param params: The advanced loading parameters, file source dependent,
e.g. timeout_s for a timeout from file's from the web.
:return: The file's content
"""
data = cls.load(source, **params)
if data is None:
return None
return data.decode(encoding=encoding)
[docs] @classmethod
def save_text(cls,
target: FileTargetTypes,
text: str,
encoding: str = "utf-8",
**params) -> bool:
"""
Saves text data to a file
:param target: The file's target, see :meth:`save_file`.
:param text: The text to be stored
:param encoding: The encoding to use. utf-8 by default.
:param params: The advanced storage parameters, depending on the
type of storage, such as timeout_s for file's stored via network.
:return: True on success
"""
encoded_text = text.encode(encoding=encoding)
return cls.save(target, data=encoded_text, **params)
[docs] @classmethod
def load_json(cls,
source: FileSourceTypes,
encoding: str = "utf-8",
**params) -> dict | None:
"""
Loads a json dictionary from a given file source
:param source: The file's source, see :meth:`load_file`.
:param encoding: The text encoding. utf-8 by default
:param params: The advanced loading parameters, file source dependent,
e.g. timeout_s for a timeout from file's from the web.
:return: The file's content
"""
data = cls.load(source, **params)
if data is None:
return None
data = data.decode(encoding=encoding)
return json.loads(data)
[docs] @classmethod
def save_json(cls,
target: FileTargetTypes,
data: dict,
indent: int | None = None,
encoding: str = "utf-8",
**params) -> bool:
"""
Saves json data to a file target
:param target: The file's target. See :class:`FileNameType`
:param data: The dictionary to be stored
:param indent: The json indenting. None by default
:param encoding: The encoding to use. utf-8 by default.
:param params: The advanced storage parameters, depending on the
type of storage, such as timeout_s for file's stored via network.
:return: True on success
"""
text = json.dumps(data) if indent is None else json.dumps(data,
indent=indent)
encoded_text = text.encode(encoding=encoding)
return cls.save(target, data=encoded_text, **params)
[docs] @classmethod
def copy(cls, source: str, target: str, create_dir: bool = False,
**params) -> bool:
"""
Copies a file from given source to given target location
:param source: The source location
:param target: The target location
:param create_dir: Defines if a directory of the target
shall be created if needed
:param params: The parameters to be passed to the source loader, e.g.
max_cache_age etc.
:return: True on success
"""
dirname = os.path.dirname(target)
if not os.path.exists(dirname):
if create_dir:
os.makedirs(dirname, exist_ok=True)
else:
return False
data = cls.load(source, **params)
if data is None:
return False
return cls.save(target, data)
[docs] @classmethod
def exists(cls,
filename: FileSourceTypes, **params) -> bool:
"""
Verifies if a file exists
:param filename: The file's source such as a local filename or URL.
See :class:`FileNameType`
:param params: Advanced parameters, protocol dependent
:return: True if the file exists
"""
from .shared_archive import SharedArchive
from scistag.filestag import ZIP_SOURCE_PROTOCOL
if filename.startswith(FILE_PATH_PROTOCOL_URL_HEADER):
filename = filename[len(FILE_PATH_PROTOCOL_URL_HEADER):]
if filename.startswith(ZIP_SOURCE_PROTOCOL):
return SharedArchive.exists_at_source(filename)
if filename.startswith(HTTP_PROTOCOL_URL_HEADER) or filename.startswith(
HTTPS_PROTOCOL_URL_HEADER):
return web_fetch(filename, **params) is not None
return os.path.exists(filename)