"""
Helper functions to handle Jupyter Notebooks
"""
import json
NOTEBOOK_CELL_DATA = "data"
"Data part of a cell"
NOTEBOOK_CELL_TYPE_CODE = "code"
"Cell type Code"
NOTEBOOK_CELL_TYPE_MARKDOWN = "markdown"
"Cell type Markdown"
NOTEBOOK_CELL_OUTPUTS = "outputs"
"Output part of a cell"
NOTEBOOK_CELL_TYPE = "cell_type"
"Cell's type"
NOTEBOOK_CELLS = "cells"
"The cell list"
[docs]class Notebook:
"""
Jupyter notebook helper class.
Is able to load a notebook, provide statistics about it and clean it,
for example to use it in a pre-commit hook.
"""
def __init__(self, source: str):
"""
:param source: The notebooks filename
"""
self.name = source
"The origin filename"
with open(source, "r", encoding="utf-8") as nb_file:
self.notebook = json.load(nb_file)
"The notebook data"
self.cell_count = 0
"Total cell count"
self.mark_down_cells = 0
"Count of markdown cells"
self.code_cells = 0
"Count of code cells"
self.output_data_size = 0
"The total size of the output data"
self.is_dirty = True
"Defines if the Notebook is dirty, e.g. has a counter of outputs"
self.parse()
[docs] def save(self, filename: str):
"""
Saves the notebook to disk
:param filename: The target filename
"""
with open(filename, "w", encoding="utf-8") as nb_file:
nb_file.write(json.dumps(self.notebook, indent=1))
[docs] def parse(self):
"""
Parses the notebook to receive some details about it
"""
for cell in self.notebook.get(NOTEBOOK_CELLS, []):
self.cell_count += 1
cell_type = cell.get(NOTEBOOK_CELL_TYPE, "")
if cell_type == NOTEBOOK_CELL_TYPE_MARKDOWN:
self.mark_down_cells += 1
elif cell.get(NOTEBOOK_CELL_TYPE, "") == NOTEBOOK_CELL_TYPE_CODE:
self.code_cells += 1
outputs = cell.get(NOTEBOOK_CELL_OUTPUTS, [])
for output in outputs:
output: dict
total_size = 0
if NOTEBOOK_CELL_DATA in output:
total_size += sum(
[len(value) for value in
output[NOTEBOOK_CELL_DATA].values()])
self.output_data_size += total_size
[docs] def clean(self,
clear_outputs=True,
clear_metadata=True,
clear_counters=True):
"""
Removes temporary data from the notebook such as outputs and execution
counters.
:param clear_outputs: Clear all outputs?
:param clear_metadata: Clear metadata?
:param clear_counters: Clear counters?
"""
for cell in self.notebook.get(NOTEBOOK_CELLS, []):
self.cell_count += 1
cell_type = cell.get(NOTEBOOK_CELL_TYPE, "")
if cell_type == NOTEBOOK_CELL_TYPE_MARKDOWN:
self.mark_down_cells += 1
elif cell.get(NOTEBOOK_CELL_TYPE, "") == NOTEBOOK_CELL_TYPE_CODE:
self.code_cells += 1
if NOTEBOOK_CELL_OUTPUTS in cell and clear_outputs:
cell[NOTEBOOK_CELL_OUTPUTS] = []
if "metadata" in cell and clear_metadata:
cell["metadata"] = {}
if "execution_count" in cell and clear_counters:
cell["execution_count"] = None
def __str__(self):
repr_str = "Notebook"
repr_str += f"\n* cells: {self.cell_count}"
repr_str += f"\n * code: {self.code_cells}"
repr_str += f"\n * markdown: {self.mark_down_cells}"
repr_str += f"\n* outputSize: {self.output_data_size}"
return repr_str