"""
Defines the class :class:`EmojiDb` which is the heart of this package.
It loads and provides information about all known Emojis and their details
such as the unicode encoding or their category.
"""
from __future__ import annotations
import io
import json
from fnmatch import fnmatch
import scistag.addons
from scistag.common.mt.stag_lock import StagLock
from scistag.common.essential_data import get_edp
from scistag.filestag.file_stag import FileStag
from .emoji_definitions import (EmojiIdentifierTypes, EMOJI_SVG_ADDON,
EMOJI_NAMES, EMOJI_DB_NAME,
EMOJI_NAMES_MARKDOWN)
from .emoji_info import EmojiInfo
[docs]class EmojiDb:
"""
The Emoji DB provides Emoji and country flag graphics.
By default it uses the Noto Emoji dataset embedded into the SciStag module.
"""
_access_lock = StagLock()
"Shared access lock"
_initialized = False
"Defines if the emoji db was initialized"
_extensions = {}
"List of known emoji addon packages"
_svg_emojis = False
"Defines if SVG emojis are available"
_markdown_names = {}
"Markdown name conversion dictionary"
_unicode_names = {}
"""
The dictionary contains all official names of the emojis and their
corresponding unicode sequence
"""
_valid_sequences = set()
"Set of valid unicode sequences"
_main_dict = {}
"""
Contains all details about every single known emoji such as name,
category, subcategory and of course unicode sequence
"""
[docs] @classmethod
def _get_markdown_dict(cls) -> dict:
"""
Returns the markdown name dictionary. Contains all common markdown
emoji names as key and their corresponding unique sequence as value
:return: The dictionary
"""
with cls._access_lock:
if len(cls._markdown_names) > 0:
return cls._markdown_names
edp = get_edp()
file_data = FileStag.load(edp + EMOJI_NAMES_MARKDOWN)
cls._markdown_names = json.load(io.BytesIO(file_data))
return cls._markdown_names
[docs] @classmethod
def _get_unicode_dict(cls) -> dict:
"""
Returns the unicode name dictionary. Contains all common emoji names as
key and their corresponding unique sequence as value for more than
3600 emojis. See unicode.org for more details.
:return: The dictionary
"""
with cls._access_lock:
if len(cls._unicode_names) > 0:
return cls._unicode_names
edp = get_edp()
file_data = FileStag.load(edp + EMOJI_NAMES)
cls._unicode_names = json.load(io.BytesIO(file_data))
return cls._unicode_names
[docs] @classmethod
def get_all_valid_sequences(cls) -> set:
"""
Returns a set of all (known) valid emoji sequences
:return A set of valid emoji sequences (all uppercased and with
an underscore separating the single elements)
"""
with cls._access_lock:
if len(cls._valid_sequences) > 0:
return cls._valid_sequences
unicode_dict = cls._get_unicode_dict()
cls._valid_sequences = set(unicode_dict.values())
return cls._valid_sequences
[docs] @classmethod
def _get_main_dict(cls) -> dict:
"""
Returns the main dictionary containing all details about an emoji.
:return: The dictionary
"""
with cls._access_lock:
if len(cls._main_dict) > 0:
return cls._main_dict
edp = get_edp()
file_data = FileStag.load(edp + EMOJI_DB_NAME)
cls._main_dict = json.load(io.BytesIO(file_data))
# hotfix for property remaining, will be fixed with next data
# update
for key, value in cls._main_dict.items():
value["sequence"] = key.split("_")
value["category"] = value["group"]
value["subcategory"] = value["subgroup"]
return cls._main_dict
[docs] @classmethod
def get_sequence_for_name(cls, identifier: str) -> list:
"""
Returns the unicode sequence for given unicode identifier
:param identifier: Either the full qualified identifier as defined by
unicode.org supporting all >3600 emojis as defined by unicode.org.
or the markdown shortcode enclosed by two colons such as ":deer:"
as defined on GitHub.
:return: The unicode sequence if the emoji could be found,
otherwise an empty list
"""
if identifier.startswith(":") and identifier.endswith(":"):
return cls._get_markdown_dict().get(identifier[1:-1], "").split("_")
unicode_dict = cls._get_unicode_dict()
if identifier in unicode_dict:
return cls._get_unicode_dict().get(identifier, "").split("_")
sequence = identifier.encode('unicode-escape').decode('ascii')
sequence = sequence.split("\\")[1:]
sequence = [element.lstrip("Uu").lstrip("0") for element in sequence]
if cls.validate_sequence(sequence):
return sequence
return []
[docs] @classmethod
def get_character_sequence(cls, identifier: EmojiIdentifierTypes) -> \
list[str]:
"""
Converts an emoji identifier to a unicode character sequence which
can be printed to the console or a markdown document.
Does not alter the value if a unicode sequence was passed already.
:param identifier: The emoji identifier, either it's unicode name,
markdown name surrounded by colons or a single emoji character.
:return: The unicode characters
"""
if isinstance(identifier, str):
identifier = cls.get_sequence_for_name(identifier)
return identifier
[docs] @classmethod
def get_character(cls, identifier: EmojiIdentifierTypes) -> str:
"""
:param identifier: The emoji's identifier
:return: The emoji character (if valid), otherwise an empty string
"""
sequence = cls.get_character_sequence(identifier)
if len(sequence) == 0:
return ""
encoding = "".join(["\\U" + element.zfill(8) for element in sequence])
ascii_encoding = encoding.encode("ASCII")
return ascii_encoding.decode("unicode-escape")
[docs] @classmethod
def validate_sequence(cls, sequence: list[str]):
"""
Returns if given sequence is known (in the current version) of our
Emoji database.
:param sequence: The sequence as unicode characters without leading
zeros.
:return: True if the sequence is known
"""
return "_".join(sequence).upper() in cls.get_all_valid_sequences()
[docs] @classmethod
def get_extensions(cls) -> dict:
"""
Returns all available emoji extensions
:return: Dictionary of extensions and their corresponding FileStag path
to access their data
"""
from scistag.imagestag import svg
with cls._access_lock:
if not cls._initialized:
cls._extensions = \
scistag.addons.AddonManager.get_addons_paths("emojis.*")
cls._initialized = True
cls._svg_emojis = \
EMOJI_SVG_ADDON in cls._extensions and \
svg.SvgRenderer.available()
return cls._extensions
[docs] @classmethod
def get_svg_support(cls) -> bool:
"""
Returns if SVG rendering is supported tne SVG repo installed
:return: True if high quality rendering is possible
"""
cls.get_extensions()
return cls._svg_emojis
[docs] @classmethod
def get_svg(cls, sequence: list[str]) -> bytes | None:
"""
Tries to read the SVG of an emoji from the database
:param sequence: The unicode sequence, e.g. ["u1f98c"] for a stag
:return: The SVG data on success, otherwise None
"""
extensions = cls.get_extensions()
if EMOJI_SVG_ADDON not in extensions:
return None
lower_cased = [element.lower() for element in sequence]
combined = "_".join(lower_cased)
emoji_path = \
extensions[
EMOJI_SVG_ADDON] + \
f"images/noto/emojis/svg/emoji_u{combined}.svg"
return FileStag.load(emoji_path)
[docs] @classmethod
def get_png(cls,
sequence: list[str]) -> bytes | None:
"""
Tries to read the SVG of an emoji from the database
:param sequence: The unicode sequence, e.g. ["1f98c"] for a stag
:return: The SVG data on success, otherwise None
"""
lower_cased = [element.lower() for element in sequence]
combined = "_".join(lower_cased)
edp = get_edp()
emoji_path = edp + f"images/noto/cpngs/emoji_u{combined}.png"
return FileStag.load(emoji_path)
[docs] @classmethod
def get_details(cls, sequence: list[str]) -> EmojiInfo | None:
"""
Returns details about am emoji
:param sequence: The unicode sequence without leading zeros.
:return: The EmojiInfo object if available
"""
main_db = cls._get_main_dict()
upper_cased = [element.upper() for element in sequence]
combined = "_".join(upper_cased)
if combined in main_db:
return EmojiInfo.parse_obj(main_db[combined])
return None
[docs] @classmethod
def get_categories(cls) -> list[str]:
"""
Returns a list of all emoji main categories
:return: A list of all known emoji categories
"""
main_dict = cls._get_main_dict()
categories = set(
[element["category"] for element in main_dict.values()])
return sorted(list(categories))
[docs] @classmethod
def get_sub_categories(cls, category: str) -> list[str]:
"""
Returns a list of all emoji sub categories of given category
:param category: The category's name
:return: A list of subcategories in this category
"""
main_dict = cls._get_main_dict()
filtered_list = [element['subcategory'] for element in
main_dict.values() if
element['category'] == category]
return sorted(list(set(filtered_list)))
[docs] @classmethod
def get_emojis_in_category(cls, category: str, subcategory: str | None) -> \
list[EmojiInfo]:
"""
Returns all emojis in the defined category and subcategory
:param category: The main category's name as obtained by
:meth:`get_categories`.
:param subcategory: The name of the subcategory. If no subcategory is
provided all emojis in the category will be returned.
:return: A list of all emojis in given category and subcategory
"""
main_dict = cls._get_main_dict()
if subcategory is not None:
filtered_list = [EmojiInfo.parse_obj(element) for element in
main_dict.values() if
element['category'] == category and
element['subcategory'] == subcategory]
else:
filtered_list = [EmojiInfo.parse_obj(element) for element in
main_dict.values() if
element['category'] == category]
return sorted(filtered_list, key=lambda element: element.name)
[docs] @classmethod
def find_emojis_by_name(cls, name_mask: str, md: bool = False):
"""
Returns all emojis which match the defined search pattern
:param name_mask: The name mask to search for, e.g *sun*
:param md: Defines if the GitHub markdown db name shall be used instead
of the full unicode name list.
:return: A list of all matching Emojis
"""
main_dict = cls._get_main_dict()
if md:
return [EmojiInfo.parse_obj(element) for element in
main_dict.values() if
fnmatch(element.get('markdownName', ""), name_mask)]
else:
return [EmojiInfo.parse_obj(element) for element in
main_dict.values() if
fnmatch(element['name'], name_mask)]
[docs] @classmethod
def __getitem__(cls, key: str) -> EmojiInfo | None:
"""
Returns the emoji details for a specific Emoji by it's name.
:param key: Either the full qualified name as defined in the unicode
database or its GitHub markdown name surrounded by colons, eg
"deer" or ":deer:"
:return: The Emoji description
"""
sequence = cls.get_sequence_for_name(key)
if len(sequence) > 0:
return cls.get_details(sequence)
raise KeyError("Emoji not found")
[docs]def get_emoji_sequence(identifier: EmojiIdentifierTypes) -> list[str]:
"""
Converts an emoji name such as ``"deer"``,
(see :meth:`EmojiDb.get_unicode_dict`, a markdown
emoji identifier such as ``":deer:"`` (see GitHub) or a single unicode
character to a unicode sequence.
Does not alter the value if a unicode sequence was passed already.
:param identifier: The emoji identifier, either it's name such as "deer",
it's markdown name such as ":deer:" or a single unicode character
like 🦌 a unicode sequence as list of strings.
:return: The unicode sequence, e.g. ["1f98c"] for a deer
"""
return EmojiDb.get_character_sequence(identifier=identifier)
[docs]def get_emoji_sequence_valid(sequence: list[str]) -> bool:
"""
Returns if given emoji sequence is know to our current database
:param sequence: The sequence, a lift of unicode sequence components
without leading zeros as returned by :meth:`get_emoji_sequence`.
:return: True if the sequence is known
"""
return EmojiDb.validate_sequence(sequence)
[docs]def get_emoji_character(identifier: EmojiIdentifierTypes) -> str:
"""
Returns the emoji unicode character for an emoji name.
:param identifier: The emoji identifier, either it's name such as "deer",
it's markdown name such as ":deer:" or a single unicode character
like 🦌 or a unicode sequence as list of strings.
:return: If the Emoji could be found a single unicode emoji character
such as 🦌, otherwise an empty string.
"""
return EmojiDb.get_character(identifier)
[docs]def get_emoji_details(identifier: EmojiIdentifierTypes) -> EmojiInfo | None:
"""
Returns details about am emoji
:param identifier: The emoji identifier, either it's name such as "deer",
it's markdown name such as ":deer:", a single unicode character
like 🦌 or a combined character like 🇩🇪.
:return: The Emoji info object if available providing all information
about the emoji stored in our db.
"""
return EmojiDb.get_details(EmojiDb.get_character_sequence(identifier))
[docs]def find_emojis_by_name(name_mask: str, md: bool = False):
"""
Returns all emojis which match the defined search pattern
:param name_mask: The name mask to search for, e.g *sun*
:param md: Defines if the GitHub markdown db name shall be used instead
of the full unicode name list.
:return: A list of all matching Emojis
"""
return EmojiDb.find_emojis_by_name(name_mask=name_mask, md=md)