Uploaded Test files

This commit is contained in:
Batuhan Berk Başoğlu 2020-11-12 11:05:57 -05:00
parent f584ad9d97
commit 2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions

View file

@ -0,0 +1,70 @@
"""The main API for the v3 notebook format.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
__all__ = ['NotebookNode', 'new_code_cell', 'new_text_cell', 'new_notebook',
'new_output', 'new_worksheet', 'new_metadata', 'new_author',
'new_heading_cell', 'nbformat', 'nbformat_minor', 'nbformat_schema',
'reads_json', 'writes_json', 'read_json', 'write_json',
'to_notebook_json', 'reads_py', 'writes_py', 'read_py', 'write_py',
'to_notebook_py', 'downgrade', 'upgrade', 'parse_filename'
]
import os
from .nbbase import (
NotebookNode,
new_code_cell, new_text_cell, new_notebook, new_output, new_worksheet,
new_metadata, new_author, new_heading_cell, nbformat, nbformat_minor,
nbformat_schema
)
from .nbjson import reads as reads_json, writes as writes_json
from .nbjson import reads as read_json, writes as write_json
from .nbjson import to_notebook as to_notebook_json
from .nbpy import reads as reads_py, writes as writes_py
from .nbpy import reads as read_py, writes as write_py
from .nbpy import to_notebook as to_notebook_py
from .convert import downgrade, upgrade
def parse_filename(fname):
"""Parse a notebook filename.
This function takes a notebook filename and returns the notebook
format (json/py) and the notebook name. This logic can be
summarized as follows:
* notebook.ipynb -> (notebook.ipynb, notebook, json)
* notebook.json -> (notebook.json, notebook, json)
* notebook.py -> (notebook.py, notebook, py)
* notebook -> (notebook.ipynb, notebook, json)
Parameters
----------
fname : unicode
The notebook filename. The filename can use a specific filename
extention (.ipynb, .json, .py) or none, in which case .ipynb will
be assumed.
Returns
-------
(fname, name, format) : (unicode, unicode, unicode)
The filename, notebook name and format.
"""
basename, ext = os.path.splitext(fname)
if ext == u'.ipynb':
format = u'json'
elif ext == u'.json':
format = u'json'
elif ext == u'.py':
format = u'py'
else:
basename = fname
fname = fname + u'.ipynb'
format = u'json'
return fname, basename, format

View file

@ -0,0 +1,91 @@
"""Code for converting notebooks to and from the v2 format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from .nbbase import (
new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output,
nbformat, nbformat_minor
)
from nbformat import v2
def _unbytes(obj):
"""There should be no bytes objects in a notebook
v2 stores png/jpeg as b64 ascii bytes
"""
if isinstance(obj, dict):
for k,v in obj.items():
obj[k] = _unbytes(v)
elif isinstance(obj, list):
for i,v in enumerate(obj):
obj[i] = _unbytes(v)
elif isinstance(obj, bytes):
# only valid bytes are b64-encoded ascii
obj = obj.decode('ascii')
return obj
def upgrade(nb, from_version=2, from_minor=0):
"""Convert a notebook to v3.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
from_version : int
The original version of the notebook to convert.
from_minor : int
The original minor version of the notebook to convert (only relevant for v >= 3).
"""
if from_version == 2:
# Mark the original nbformat so consumers know it has been converted.
nb.nbformat = nbformat
nb.nbformat_minor = nbformat_minor
nb.orig_nbformat = 2
nb = _unbytes(nb)
for ws in nb['worksheets']:
for cell in ws['cells']:
cell.setdefault('metadata', {})
return nb
elif from_version == 3:
if from_minor != nbformat_minor:
nb.orig_nbformat_minor = from_minor
nb.nbformat_minor = nbformat_minor
return nb
else:
raise ValueError('Cannot convert a notebook directly from v%s to v3. ' \
'Try using the nbformat.convert module.' % from_version)
def heading_to_md(cell):
"""turn heading cell into corresponding markdown"""
cell.cell_type = "markdown"
level = cell.pop('level', 1)
cell.source = '#'*level + ' ' + cell.source
def raw_to_md(cell):
"""let raw passthrough as markdown"""
cell.cell_type = "markdown"
def downgrade(nb):
"""Convert a v3 notebook to v2.
Parameters
----------
nb : NotebookNode
The Python representation of the notebook to convert.
"""
if nb.nbformat != 3:
return nb
nb.nbformat = 2
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'heading':
heading_to_md(cell)
elif cell.cell_type == 'raw':
raw_to_md(cell)
return nb

View file

@ -0,0 +1,204 @@
"""The basic dict based notebook format.
The Python representation of a notebook is a nested structure of
dictionary subclasses that support attribute access
(ipython_genutils.ipstruct.Struct). The functions in this module are merely
helpers to build the structs in the right form.
"""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import pprint
import uuid
from ipython_genutils.ipstruct import Struct
from ipython_genutils.py3compat import cast_unicode
#-----------------------------------------------------------------------------
# Code
#-----------------------------------------------------------------------------
# Change this when incrementing the nbformat version
nbformat = 3
nbformat_minor = 0
nbformat_schema = {
(3, 0): 'nbformat.v3.schema.json'
}
class NotebookNode(Struct):
pass
def from_dict(d):
if isinstance(d, dict):
newd = NotebookNode()
for k,v in d.items():
newd[k] = from_dict(v)
return newd
elif isinstance(d, (tuple, list)):
return [from_dict(i) for i in d]
else:
return d
def new_output(output_type, output_text=None, output_png=None,
output_html=None, output_svg=None, output_latex=None, output_json=None,
output_javascript=None, output_jpeg=None, prompt_number=None,
ename=None, evalue=None, traceback=None, stream=None, metadata=None):
"""Create a new output, to go in the ``cell.outputs`` list of a code cell.
"""
output = NotebookNode()
output.output_type = str(output_type)
if metadata is None:
metadata = {}
if not isinstance(metadata, dict):
raise TypeError("metadata must be dict")
if output_type in {u'pyout', 'display_data'}:
output.metadata = metadata
if output_type != 'pyerr':
if output_text is not None:
output.text = cast_unicode(output_text)
if output_png is not None:
output.png = cast_unicode(output_png)
if output_jpeg is not None:
output.jpeg = cast_unicode(output_jpeg)
if output_html is not None:
output.html = cast_unicode(output_html)
if output_svg is not None:
output.svg = cast_unicode(output_svg)
if output_latex is not None:
output.latex = cast_unicode(output_latex)
if output_json is not None:
output.json = cast_unicode(output_json)
if output_javascript is not None:
output.javascript = cast_unicode(output_javascript)
if output_type == u'pyout':
if prompt_number is not None:
output.prompt_number = int(prompt_number)
if output_type == u'pyerr':
if ename is not None:
output.ename = cast_unicode(ename)
if evalue is not None:
output.evalue = cast_unicode(evalue)
if traceback is not None:
output.traceback = [cast_unicode(frame) for frame in list(traceback)]
if output_type == u'stream':
output.stream = 'stdout' if stream is None else cast_unicode(stream)
return output
def new_code_cell(input=None, prompt_number=None, outputs=None,
language=u'python', collapsed=False, metadata=None):
"""Create a new code cell with input and output"""
cell = NotebookNode()
cell.cell_type = u'code'
if language is not None:
cell.language = cast_unicode(language)
if input is not None:
cell.input = cast_unicode(input)
if prompt_number is not None:
cell.prompt_number = int(prompt_number)
if outputs is None:
cell.outputs = []
else:
cell.outputs = outputs
if collapsed is not None:
cell.collapsed = bool(collapsed)
cell.metadata = NotebookNode(metadata or {})
return cell
def new_text_cell(cell_type, source=None, rendered=None, metadata=None):
"""Create a new text cell."""
cell = NotebookNode()
# VERSIONHACK: plaintext -> raw
# handle never-released plaintext name for raw cells
if cell_type == 'plaintext':
cell_type = 'raw'
if source is not None:
cell.source = cast_unicode(source)
cell.metadata = NotebookNode(metadata or {})
cell.cell_type = cell_type
return cell
def new_heading_cell(source=None, level=1, rendered=None, metadata=None):
"""Create a new section cell with a given integer level."""
cell = NotebookNode()
cell.cell_type = u'heading'
if source is not None:
cell.source = cast_unicode(source)
cell.level = int(level)
cell.metadata = NotebookNode(metadata or {})
return cell
def new_worksheet(name=None, cells=None, metadata=None):
"""Create a worksheet by name with with a list of cells."""
ws = NotebookNode()
if cells is None:
ws.cells = []
else:
ws.cells = list(cells)
ws.metadata = NotebookNode(metadata or {})
return ws
def new_notebook(name=None, metadata=None, worksheets=None):
"""Create a notebook by name, id and a list of worksheets."""
nb = NotebookNode()
nb.nbformat = nbformat
nb.nbformat_minor = nbformat_minor
if worksheets is None:
nb.worksheets = []
else:
nb.worksheets = list(worksheets)
if metadata is None:
nb.metadata = new_metadata()
else:
nb.metadata = NotebookNode(metadata)
if name is not None:
nb.metadata.name = cast_unicode(name)
return nb
def new_metadata(name=None, authors=None, license=None, created=None,
modified=None, gistid=None):
"""Create a new metadata node."""
metadata = NotebookNode()
if name is not None:
metadata.name = cast_unicode(name)
if authors is not None:
metadata.authors = list(authors)
if created is not None:
metadata.created = cast_unicode(created)
if modified is not None:
metadata.modified = cast_unicode(modified)
if license is not None:
metadata.license = cast_unicode(license)
if gistid is not None:
metadata.gistid = cast_unicode(gistid)
return metadata
def new_author(name=None, email=None, affiliation=None, url=None):
"""Create a new author."""
author = NotebookNode()
if name is not None:
author.name = cast_unicode(name)
if email is not None:
author.email = cast_unicode(email)
if affiliation is not None:
author.affiliation = cast_unicode(affiliation)
if url is not None:
author.url = cast_unicode(url)
return author

View file

@ -0,0 +1,367 @@
{
"$schema": "http://json-schema.org/draft-04/schema#",
"description": "IPython Notebook v3.0 JSON schema.",
"type": "object",
"additionalProperties": false,
"required": ["metadata", "nbformat_minor", "nbformat", "worksheets"],
"properties": {
"metadata": {
"description": "Notebook root-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"kernel_info": {
"description": "Kernel information.",
"type": "object",
"required": ["name", "language"],
"properties": {
"name": {
"description": "Name of the kernel specification.",
"type": "string"
},
"language": {
"description": "The programming language which this kernel runs.",
"type": "string"
},
"codemirror_mode": {
"description": "The codemirror mode to use for code in this language.",
"type": "string"
}
}
},
"signature": {
"description": "Hash of the notebook.",
"type": "string"
}
}
},
"nbformat_minor": {
"description": "Notebook format (minor number). Incremented for backward compatible changes to the notebook format.",
"type": "integer",
"minimum": 0
},
"nbformat": {
"description": "Notebook format (major number). Incremented between backwards incompatible changes to the notebook format.",
"type": "integer",
"minimum": 3,
"maximum": 3
},
"orig_nbformat": {
"description": "Original notebook format (major number) before converting the notebook between versions.",
"type": "integer",
"minimum": 1
},
"orig_nbformat_minor": {
"description": "Original notebook format (minor number) before converting the notebook between versions.",
"type": "integer",
"minimum": 0
},
"worksheets" : {
"description": "Array of worksheets",
"type": "array",
"items": {"$ref": "#/definitions/worksheet"}
}
},
"definitions": {
"worksheet": {
"additionalProperties": false,
"required" : ["cells"],
"properties":{
"cells": {
"description": "Array of cells of the current notebook.",
"type": "array",
"items": {
"type": "object",
"oneOf": [
{"$ref": "#/definitions/raw_cell"},
{"$ref": "#/definitions/markdown_cell"},
{"$ref": "#/definitions/heading_cell"},
{"$ref": "#/definitions/code_cell"}
]
}
},
"metadata": {
"type": "object",
"description": "metadata of the current worksheet"
}
}
},
"raw_cell": {
"description": "Notebook raw nbconvert cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["raw"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true,
"properties": {
"format": {
"description": "Raw cell metadata format for nbconvert.",
"type": "string"
},
"name": {"$ref": "#/definitions/misc/metadata_name"},
"tags": {"$ref": "#/definitions/misc/metadata_tags"}
}
},
"source": {"$ref": "#/definitions/misc/source"}
}
},
"markdown_cell": {
"description": "Notebook markdown cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["markdown", "html"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"properties": {
"name": {"$ref": "#/definitions/misc/metadata_name"},
"tags": {"$ref": "#/definitions/misc/metadata_tags"}
},
"additionalProperties": true
},
"source": {"$ref": "#/definitions/misc/source"}
}
},
"heading_cell": {
"description": "Notebook heading cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "source", "level"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["heading"]
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true
},
"source": {"$ref": "#/definitions/misc/source"},
"level": {
"description": "Level of heading cells.",
"type": "integer",
"minimum": 1
}
}
},
"code_cell": {
"description": "Notebook code cell.",
"type": "object",
"additionalProperties": false,
"required": ["cell_type", "input", "outputs", "language"],
"properties": {
"cell_type": {
"description": "String identifying the type of cell.",
"enum": ["code"]
},
"language": {
"description": "The cell's language (always Python)",
"type": "string"
},
"collapsed": {
"description": "Whether the cell is collapsed/expanded.",
"type": "boolean"
},
"metadata": {
"description": "Cell-level metadata.",
"type": "object",
"additionalProperties": true
},
"input": {"$ref": "#/definitions/misc/source"},
"outputs": {
"description": "Execution, display, or stream outputs.",
"type": "array",
"items": {"$ref": "#/definitions/output"}
},
"prompt_number": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
}
}
},
"output": {
"type": "object",
"oneOf": [
{"$ref": "#/definitions/pyout"},
{"$ref": "#/definitions/display_data"},
{"$ref": "#/definitions/stream"},
{"$ref": "#/definitions/pyerr"}
]
},
"pyout": {
"description": "Result of executing a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "prompt_number"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["pyout"]
},
"prompt_number": {
"description": "A result's prompt number.",
"type": ["integer"],
"minimum": 0
},
"text": {"$ref": "#/definitions/misc/multiline_string"},
"latex": {"$ref": "#/definitions/misc/multiline_string"},
"png": {"$ref": "#/definitions/misc/multiline_string"},
"jpeg": {"$ref": "#/definitions/misc/multiline_string"},
"svg": {"$ref": "#/definitions/misc/multiline_string"},
"html": {"$ref": "#/definitions/misc/multiline_string"},
"javascript": {"$ref": "#/definitions/misc/multiline_string"},
"json": {"$ref": "#/definitions/misc/multiline_string"},
"pdf": {"$ref": "#/definitions/misc/multiline_string"},
"metadata": {"$ref": "#/definitions/misc/output_metadata"}
},
"patternProperties": {
"^[a-zA-Z0-9]+/[a-zA-Z0-9\\-\\+\\.]+$": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"display_data": {
"description": "Data displayed as a result of code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["display_data"]
},
"text": {"$ref": "#/definitions/misc/multiline_string"},
"latex": {"$ref": "#/definitions/misc/multiline_string"},
"png": {"$ref": "#/definitions/misc/multiline_string"},
"jpeg": {"$ref": "#/definitions/misc/multiline_string"},
"svg": {"$ref": "#/definitions/misc/multiline_string"},
"html": {"$ref": "#/definitions/misc/multiline_string"},
"javascript": {"$ref": "#/definitions/misc/multiline_string"},
"json": {"$ref": "#/definitions/misc/multiline_string"},
"pdf": {"$ref": "#/definitions/misc/multiline_string"},
"metadata": {"$ref": "#/definitions/misc/output_metadata"}
},
"patternProperties": {
"[a-zA-Z0-9]+/[a-zA-Z0-9\\-\\+\\.]+$": {
"description": "mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"stream": {
"description": "Stream output from a code cell.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "stream", "text"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["stream"]
},
"stream": {
"description": "The stream type/destination.",
"type": "string"
},
"text": {
"description": "The stream's text output, represented as an array of strings.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"pyerr": {
"description": "Output of an error that occurred during code cell execution.",
"type": "object",
"additionalProperties": false,
"required": ["output_type", "ename", "evalue", "traceback"],
"properties": {
"output_type": {
"description": "Type of cell output.",
"enum": ["pyerr"]
},
"ename": {
"description": "The name of the error.",
"type": "string"
},
"evalue": {
"description": "The value, or message, of the error.",
"type": "string"
},
"traceback": {
"description": "The error's traceback, represented as an array of strings.",
"type": "array",
"items": {"type": "string"}
}
}
},
"misc": {
"metadata_name": {
"description": "The cell's name. If present, must be a non-empty string.",
"type": "string",
"pattern": "^.+$"
},
"metadata_tags": {
"description": "The cell's tags. Tags must be unique, and must not contain commas.",
"type": "array",
"uniqueItems": true,
"items": {
"type": "string",
"pattern": "^[^,]+$"
}
},
"source": {
"description": "Contents of the cell, represented as an array of lines.",
"$ref": "#/definitions/misc/multiline_string"
},
"prompt_number": {
"description": "The code cell's prompt number. Will be null if the cell has not been run.",
"type": ["integer", "null"],
"minimum": 0
},
"mimetype": {
"patternProperties": {
"^[a-zA-Z0-9\\-\\+]+/[a-zA-Z0-9\\-\\+]+": {
"description": "The cell's mimetype output (e.g. text/plain), represented as either an array of strings or a string.",
"$ref": "#/definitions/misc/multiline_string"
}
}
},
"output_metadata": {
"description": "Cell output metadata.",
"type": "object",
"additionalProperties": true
},
"multiline_string": {
"oneOf" : [
{"type": "string"},
{
"type": "array",
"items": {"type": "string"}
}
]
}
}
}
}

View file

@ -0,0 +1,58 @@
"""Read and write notebooks in JSON format."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
import copy
import json
from .nbbase import from_dict
from .rwbase import (
NotebookReader, NotebookWriter, restore_bytes, rejoin_lines, split_lines,
strip_transient,
)
class BytesEncoder(json.JSONEncoder):
"""A JSON encoder that accepts b64 (and other *ascii*) bytestrings."""
def default(self, obj):
if isinstance(obj, bytes):
return obj.decode('ascii')
return json.JSONEncoder.default(self, obj)
class JSONReader(NotebookReader):
def reads(self, s, **kwargs):
nb = json.loads(s, **kwargs)
nb = self.to_notebook(nb, **kwargs)
nb = strip_transient(nb)
return nb
def to_notebook(self, d, **kwargs):
return rejoin_lines(from_dict(d))
class JSONWriter(NotebookWriter):
def writes(self, nb, **kwargs):
kwargs['cls'] = BytesEncoder
kwargs['indent'] = 1
kwargs['sort_keys'] = True
kwargs['separators'] = (',',': ')
nb = copy.deepcopy(nb)
nb = strip_transient(nb)
if kwargs.pop('split_lines', True):
nb = split_lines(nb)
return json.dumps(nb, **kwargs)
_reader = JSONReader()
_writer = JSONWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View file

@ -0,0 +1,204 @@
"""Read and write notebooks as regular .py files.
Authors:
* Brian Granger
"""
#-----------------------------------------------------------------------------
# Copyright (C) 2008-2011 The IPython Development Team
#
# Distributed under the terms of the BSD License. The full license is in
# the file COPYING, distributed as part of this software.
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
import re
from .rwbase import NotebookReader, NotebookWriter
from .nbbase import (
new_code_cell, new_text_cell, new_worksheet,
new_notebook, new_heading_cell, nbformat, nbformat_minor,
)
#-----------------------------------------------------------------------------
# Code
#-----------------------------------------------------------------------------
_encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")
class PyReaderError(Exception):
pass
class PyReader(NotebookReader):
def reads(self, s, **kwargs):
return self.to_notebook(s,**kwargs)
def to_notebook(self, s, **kwargs):
lines = s.splitlines()
cells = []
cell_lines = []
kwargs = {}
state = u'codecell'
for line in lines:
if line.startswith(u'# <nbformat>') or _encoding_declaration_re.match(line):
pass
elif line.startswith(u'# <codecell>'):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = u'codecell'
cell_lines = []
kwargs = {}
elif line.startswith(u'# <htmlcell>'):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = u'htmlcell'
cell_lines = []
kwargs = {}
elif line.startswith(u'# <markdowncell>'):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = u'markdowncell'
cell_lines = []
kwargs = {}
# VERSIONHACK: plaintext -> raw
elif line.startswith(u'# <rawcell>') or line.startswith(u'# <plaintextcell>'):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
state = u'rawcell'
cell_lines = []
kwargs = {}
elif line.startswith(u'# <headingcell'):
cell = self.new_cell(state, cell_lines, **kwargs)
if cell is not None:
cells.append(cell)
cell_lines = []
m = re.match(r'# <headingcell level=(?P<level>\d)>',line)
if m is not None:
state = u'headingcell'
kwargs = {}
kwargs['level'] = int(m.group('level'))
else:
state = u'codecell'
kwargs = {}
cell_lines = []
else:
cell_lines.append(line)
if cell_lines and state == u'codecell':
cell = self.new_cell(state, cell_lines)
if cell is not None:
cells.append(cell)
ws = new_worksheet(cells=cells)
nb = new_notebook(worksheets=[ws])
return nb
def new_cell(self, state, lines, **kwargs):
if state == u'codecell':
input = u'\n'.join(lines)
input = input.strip(u'\n')
if input:
return new_code_cell(input=input)
elif state == u'htmlcell':
text = self._remove_comments(lines)
if text:
return new_text_cell(u'html',source=text)
elif state == u'markdowncell':
text = self._remove_comments(lines)
if text:
return new_text_cell(u'markdown',source=text)
elif state == u'rawcell':
text = self._remove_comments(lines)
if text:
return new_text_cell(u'raw',source=text)
elif state == u'headingcell':
text = self._remove_comments(lines)
level = kwargs.get('level',1)
if text:
return new_heading_cell(source=text,level=level)
def _remove_comments(self, lines):
new_lines = []
for line in lines:
if line.startswith(u'#'):
new_lines.append(line[2:])
else:
new_lines.append(line)
text = u'\n'.join(new_lines)
text = text.strip(u'\n')
return text
def split_lines_into_blocks(self, lines):
if len(lines) == 1:
yield lines[0]
raise StopIteration()
import ast
source = '\n'.join(lines)
code = ast.parse(source)
starts = [x.lineno-1 for x in code.body]
for i in range(len(starts)-1):
yield '\n'.join(lines[starts[i]:starts[i+1]]).strip('\n')
yield '\n'.join(lines[starts[-1]:]).strip('\n')
class PyWriter(NotebookWriter):
def writes(self, nb, **kwargs):
lines = [u'# -*- coding: utf-8 -*-']
lines.extend([
u'# <nbformat>%i.%i</nbformat>' % (nbformat, nbformat_minor),
u'',
])
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == u'code':
input = cell.get(u'input')
if input is not None:
lines.extend([u'# <codecell>',u''])
lines.extend(input.splitlines())
lines.append(u'')
elif cell.cell_type == u'html':
input = cell.get(u'source')
if input is not None:
lines.extend([u'# <htmlcell>',u''])
lines.extend([u'# ' + line for line in input.splitlines()])
lines.append(u'')
elif cell.cell_type == u'markdown':
input = cell.get(u'source')
if input is not None:
lines.extend([u'# <markdowncell>',u''])
lines.extend([u'# ' + line for line in input.splitlines()])
lines.append(u'')
elif cell.cell_type == u'raw':
input = cell.get(u'source')
if input is not None:
lines.extend([u'# <rawcell>',u''])
lines.extend([u'# ' + line for line in input.splitlines()])
lines.append(u'')
elif cell.cell_type == u'heading':
input = cell.get(u'source')
level = cell.get(u'level',1)
if input is not None:
lines.extend([u'# <headingcell level=%s>' % level,u''])
lines.extend([u'# ' + line for line in input.splitlines()])
lines.append(u'')
lines.append('')
return u'\n'.join(lines)
_reader = PyReader()
_writer = PyWriter()
reads = _reader.reads
read = _reader.read
to_notebook = _reader.to_notebook
write = _writer.write
writes = _writer.writes

View file

@ -0,0 +1,184 @@
"""Base classes and utilities for readers and writers."""
# Copyright (c) IPython Development Team.
# Distributed under the terms of the Modified BSD License.
from ipython_genutils.py3compat import str_to_bytes
from .._compat import encodebytes, decodebytes
def restore_bytes(nb):
"""Restore bytes of image data from unicode-only formats.
Base64 encoding is handled elsewhere. Bytes objects in the notebook are
always b64-encoded. We DO NOT encode/decode around file formats.
Note: this is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
for output in cell.outputs:
if 'png' in output:
output.png = str_to_bytes(output.png, 'ascii')
if 'jpeg' in output:
output.jpeg = str_to_bytes(output.jpeg, 'ascii')
return nb
# output keys that are likely to have multiline values
_multiline_outputs = ['text', 'html', 'svg', 'latex', 'javascript', 'json']
# FIXME: workaround for old splitlines()
def _join_lines(lines):
"""join lines that have been written by splitlines()
Has logic to protect against `splitlines()`, which
should have been `splitlines(True)`
"""
if lines and lines[0].endswith(('\n', '\r')):
# created by splitlines(True)
return u''.join(lines)
else:
# created by splitlines()
return u'\n'.join(lines)
def rejoin_lines(nb):
"""rejoin multiline text into strings
For reversing effects of ``split_lines(nb)``.
This only rejoins lines that have been split, so if text objects were not split
they will pass through unchanged.
Used when reading JSON files that may have been passed through split_lines.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
if 'input' in cell and isinstance(cell.input, list):
cell.input = _join_lines(cell.input)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, list):
output[key] = _join_lines(item)
else: # text, heading cell
for key in ['source', 'rendered']:
item = cell.get(key, None)
if isinstance(item, list):
cell[key] = _join_lines(item)
return nb
def split_lines(nb):
"""split likely multiline text into lists of strings
For file output more friendly to line-based VCS. ``rejoin_lines(nb)`` will
reverse the effects of ``split_lines(nb)``.
Used when writing JSON files.
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
if 'input' in cell and isinstance(cell.input, str):
cell.input = cell.input.splitlines(True)
for output in cell.outputs:
for key in _multiline_outputs:
item = output.get(key, None)
if isinstance(item, str):
output[key] = item.splitlines(True)
else: # text, heading cell
for key in ['source', 'rendered']:
item = cell.get(key, None)
if isinstance(item, str):
cell[key] = item.splitlines(True)
return nb
# b64 encode/decode are never actually used, because all bytes objects in
# the notebook are already b64-encoded, and we don't need/want to double-encode
def base64_decode(nb):
"""Restore all bytes objects in the notebook from base64-encoded strings.
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
for output in cell.outputs:
if 'png' in output:
if isinstance(output.png, str):
output.png = output.png.encode('ascii')
output.png = decodebytes(output.png)
if 'jpeg' in output:
if isinstance(output.jpeg, str):
output.jpeg = output.jpeg.encode('ascii')
output.jpeg = decodebytes(output.jpeg)
return nb
def base64_encode(nb):
"""Base64 encode all bytes objects in the notebook.
These will be b64-encoded unicode strings
Note: This is never used
"""
for ws in nb.worksheets:
for cell in ws.cells:
if cell.cell_type == 'code':
for output in cell.outputs:
if 'png' in output:
output.png = encodebytes(output.png).decode('ascii')
if 'jpeg' in output:
output.jpeg = encodebytes(output.jpeg).decode('ascii')
return nb
def strip_transient(nb):
"""Strip transient values that shouldn't be stored in files.
This should be called in *both* read and write.
"""
nb.pop('orig_nbformat', None)
nb.pop('orig_nbformat_minor', None)
for ws in nb['worksheets']:
for cell in ws['cells']:
cell.get('metadata', {}).pop('trusted', None)
# strip cell.trusted even though it shouldn't be used,
# since it's where the transient value used to be stored.
cell.pop('trusted', None)
return nb
class NotebookReader(object):
"""A class for reading notebooks."""
def reads(self, s, **kwargs):
"""Read a notebook from a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def read(self, fp, **kwargs):
"""Read a notebook from a file like object"""
nbs = fp.read()
return self.reads(nbs, **kwargs)
class NotebookWriter(object):
"""A class for writing notebooks."""
def writes(self, nb, **kwargs):
"""Write a notebook to a string."""
raise NotImplementedError("loads must be implemented in a subclass")
def write(self, nb, fp, **kwargs):
"""Write a notebook to a file like object"""
nbs = self.writes(nb,**kwargs)
return fp.write(nbs)

View file

@ -0,0 +1,63 @@
# -*- coding: utf8 -*-
import io
import os
import shutil
import tempfile
pjoin = os.path.join
from ..nbbase import (
NotebookNode,
new_code_cell, new_text_cell, new_worksheet, new_notebook
)
from ..nbpy import reads, writes, read, write
from .nbexamples import nb0, nb0_py
def open_utf8(fname, mode):
return io.open(fname, mode=mode, encoding='utf-8')
class NBFormatTest:
"""Mixin for writing notebook format tests"""
# override with appropriate values in subclasses
nb0_ref = None
ext = None
mod = None
def setUp(self):
self.wd = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.wd)
def assertNBEquals(self, nba, nbb):
self.assertEqual(nba, nbb)
def test_writes(self):
s = self.mod.writes(nb0)
if self.nb0_ref:
self.assertEqual(s, self.nb0_ref)
def test_reads(self):
s = self.mod.writes(nb0)
nb = self.mod.reads(s)
def test_roundtrip(self):
s = self.mod.writes(nb0)
self.assertNBEquals(self.mod.reads(s),nb0)
def test_write_file(self):
with open_utf8(pjoin(self.wd, "nb0.%s" % self.ext), 'w') as f:
self.mod.write(nb0, f)
def test_read_file(self):
with open_utf8(pjoin(self.wd, "nb0.%s" % self.ext), 'w') as f:
self.mod.write(nb0, f)
with open_utf8(pjoin(self.wd, "nb0.%s" % self.ext), 'r') as f:
nb = self.mod.read(f)

View file

@ -0,0 +1,152 @@
# -*- coding: utf-8 -*-
import os
from ..._compat import encodebytes
from ..nbbase import (
NotebookNode,
new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output,
new_metadata, new_author, new_heading_cell, nbformat, nbformat_minor
)
# some random base64-encoded *text*
png = encodebytes(os.urandom(5)).decode('ascii')
jpeg = encodebytes(os.urandom(6)).decode('ascii')
ws = new_worksheet()
ws.cells.append(new_text_cell(
u'html',
source='Some NumPy Examples',
))
ws.cells.append(new_code_cell(
input='import numpy',
prompt_number=1,
collapsed=False
))
ws.cells.append(new_text_cell(
u'markdown',
source='A random array',
))
ws.cells.append(new_text_cell(
u'raw',
source='A random array',
))
ws.cells.append(new_heading_cell(
u'My Heading',
level=2
))
ws.cells.append(new_code_cell(
input='a = numpy.random.rand(100)',
prompt_number=2,
collapsed=True
))
ws.cells.append(new_code_cell(
input='a = 10\nb = 5\n',
prompt_number=3,
))
ws.cells.append(new_code_cell(
input='a = 10\nb = 5',
prompt_number=4,
))
ws.cells.append(new_code_cell(
input=u'print "ünîcødé"',
prompt_number=3,
collapsed=False,
outputs=[new_output(
output_type=u'pyout',
output_text=u'<array a>',
output_html=u'The HTML rep',
output_latex=u'$a$',
output_png=png,
output_jpeg=jpeg,
output_svg=u'<svg>',
output_json=u'{"json": "data"}',
output_javascript=u'var i=0;',
prompt_number=3
),new_output(
output_type=u'display_data',
output_text=u'<array a>',
output_html=u'The HTML rep',
output_latex=u'$a$',
output_png=png,
output_jpeg=jpeg,
output_svg=u'<svg>',
output_json=u'{"json": "data"}',
output_javascript=u'var i=0;'
),new_output(
output_type=u'pyerr',
ename=u'NameError',
evalue=u'NameError was here',
traceback=[u'frame 0', u'frame 1', u'frame 2']
),new_output(
output_type=u'stream',
output_text='foo\rbar\r\n'
),new_output(
output_type=u'stream',
stream='stderr',
output_text='\rfoo\rbar\n'
)]
))
authors = [new_author(name='Bart Simpson',email='bsimpson@fox.com',
affiliation=u'Fox',url=u'http://www.fox.com')]
md = new_metadata(name=u'My Notebook',license=u'BSD',created=u'8601_goes_here',
modified=u'8601_goes_here',gistid=u'21341231',authors=authors)
nb0 = new_notebook(
worksheets=[ws, new_worksheet()],
metadata=md
)
nb0_py = u"""# -*- coding: utf-8 -*-
# <nbformat>%i.%i</nbformat>
# <htmlcell>
# Some NumPy Examples
# <codecell>
import numpy
# <markdowncell>
# A random array
# <rawcell>
# A random array
# <headingcell level=2>
# My Heading
# <codecell>
a = numpy.random.rand(100)
# <codecell>
a = 10
b = 5
# <codecell>
a = 10
b = 5
# <codecell>
print "ünîcødé"
""" % (nbformat, nbformat_minor)

View file

@ -0,0 +1,102 @@
import copy
import json
from unittest import TestCase
from ..._compat import decodebytes
from ..nbjson import reads, writes
from ..nbbase import from_dict
from .. import nbjson
from .nbexamples import nb0
from . import formattest
from .nbexamples import nb0
class TestJSON(formattest.NBFormatTest, TestCase):
nb0_ref = None
ext = 'ipynb'
mod = nbjson
def test_roundtrip_nosplit(self):
"""Ensure that multiline blobs are still readable"""
# ensures that notebooks written prior to splitlines change
# are still readable.
s = writes(nb0, split_lines=False)
self.assertEqual(nbjson.reads(s),nb0)
def test_roundtrip_split(self):
"""Ensure that splitting multiline blocks is safe"""
# This won't differ from test_roundtrip unless the default changes
s = writes(nb0, split_lines=True)
self.assertEqual(nbjson.reads(s),nb0)
def test_strip_transient(self):
"""transient values aren't written to files"""
nb = copy.deepcopy(nb0)
nb.orig_nbformat = 2
nb.orig_nbformat_minor = 3
nb.worksheets[0].cells[0].metadata.trusted = False
nbs = nbjson.writes(nb)
nb2 = from_dict(json.loads(nbs))
self.assertNotIn('orig_nbformat', nb2)
self.assertNotIn('orig_nbformat_minor', nb2)
for cell in nb2.worksheets[0].cells:
self.assertNotIn('trusted', cell.metadata)
def test_to_json(self):
"""to_notebook_json doesn't strip transient"""
nb = copy.deepcopy(nb0)
nb.orig_nbformat = 2
nb.orig_nbformat_minor = 3
nb.worksheets[0].cells[0].metadata.trusted = False
nbs = json.dumps(nb)
nb2 = nbjson.to_notebook(json.loads(nbs))
nb2 = from_dict(json.loads(nbs))
self.assertIn('orig_nbformat', nb2)
self.assertIn('orig_nbformat_minor', nb2)
cell = nb2.worksheets[0].cells[0]
self.assertIn('trusted', cell.metadata)
def test_read_png(self):
"""PNG output data is b64 unicode"""
s = writes(nb0)
nb1 = nbjson.reads(s)
found_png = False
for cell in nb1.worksheets[0].cells:
if not 'outputs' in cell:
continue
for output in cell.outputs:
if 'png' in output:
found_png = True
pngdata = output['png']
self.assertEqual(type(pngdata), str)
# test that it is valid b64 data
b64bytes = pngdata.encode('ascii')
raw_bytes = decodebytes(b64bytes)
assert found_png, "never found png output"
def test_read_jpeg(self):
"""JPEG output data is b64 unicode"""
s = writes(nb0)
nb1 = nbjson.reads(s)
found_jpeg = False
for cell in nb1.worksheets[0].cells:
if not 'outputs' in cell:
continue
for output in cell.outputs:
if 'jpeg' in output:
found_jpeg = True
jpegdata = output['jpeg']
self.assertEqual(type(jpegdata), str)
# test that it is valid b64 data
b64bytes = jpegdata.encode('ascii')
raw_bytes = decodebytes(b64bytes)
assert found_jpeg, "never found jpeg output"

View file

@ -0,0 +1,32 @@
import os
from unittest import TestCase
from .. import parse_filename
class MiscTests(TestCase):
def check_filename(self, path, exp_fname, exp_bname, exp_format):
fname, bname, format = parse_filename(path)
self.assertEqual(fname, exp_fname)
self.assertEqual(bname, exp_bname)
self.assertEqual(format, exp_format)
def test_parse_filename(self):
# check format detection
self.check_filename("test.ipynb", "test.ipynb", "test", "json")
self.check_filename("test.json", "test.json", "test", "json")
self.check_filename("test.py", "test.py", "test", "py")
# check parsing an unknown format
self.check_filename("test.nb", "test.nb.ipynb", "test.nb", "json")
# check parsing a full file path
abs_path = os.path.abspath("test.ipynb")
basename, ext = os.path.splitext(abs_path)
self.check_filename(abs_path, abs_path, basename, "json")
# check parsing a file name containing dots
self.check_filename("test.nb.ipynb", "test.nb.ipynb", "test.nb",
"json")

View file

@ -0,0 +1,148 @@
from unittest import TestCase
from ..nbbase import (
NotebookNode,
new_code_cell, new_text_cell, new_worksheet, new_notebook, new_output,
new_author, new_metadata, new_heading_cell, nbformat
)
class TestCell(TestCase):
def test_empty_code_cell(self):
cc = new_code_cell()
self.assertEqual(cc.cell_type,u'code')
self.assertEqual(u'input' not in cc, True)
self.assertEqual(u'prompt_number' not in cc, True)
self.assertEqual(cc.outputs, [])
self.assertEqual(cc.collapsed, False)
def test_code_cell(self):
cc = new_code_cell(input='a=10', prompt_number=0, collapsed=True)
cc.outputs = [new_output(output_type=u'pyout',
output_svg=u'foo',output_text=u'10',prompt_number=0)]
self.assertEqual(cc.input, u'a=10')
self.assertEqual(cc.prompt_number, 0)
self.assertEqual(cc.language, u'python')
self.assertEqual(cc.outputs[0].svg, u'foo')
self.assertEqual(cc.outputs[0].text, u'10')
self.assertEqual(cc.outputs[0].prompt_number, 0)
self.assertEqual(cc.collapsed, True)
def test_pyerr(self):
o = new_output(output_type=u'pyerr', ename=u'NameError',
evalue=u'Name not found', traceback=[u'frame 0', u'frame 1', u'frame 2']
)
self.assertEqual(o.output_type, u'pyerr')
self.assertEqual(o.ename, u'NameError')
self.assertEqual(o.evalue, u'Name not found')
self.assertEqual(o.traceback, [u'frame 0', u'frame 1', u'frame 2'])
def test_empty_html_cell(self):
tc = new_text_cell(u'html')
self.assertEqual(tc.cell_type, u'html')
self.assertEqual(u'source' not in tc, True)
def test_html_cell(self):
tc = new_text_cell(u'html', 'hi')
self.assertEqual(tc.source, u'hi')
def test_empty_markdown_cell(self):
tc = new_text_cell(u'markdown')
self.assertEqual(tc.cell_type, u'markdown')
self.assertEqual(u'source' not in tc, True)
def test_markdown_cell(self):
tc = new_text_cell(u'markdown', 'hi')
self.assertEqual(tc.source, u'hi')
def test_empty_raw_cell(self):
tc = new_text_cell(u'raw')
self.assertEqual(tc.cell_type, u'raw')
self.assertEqual(u'source' not in tc, True)
def test_raw_cell(self):
tc = new_text_cell(u'raw', 'hi')
self.assertEqual(tc.source, u'hi')
def test_empty_heading_cell(self):
tc = new_heading_cell()
self.assertEqual(tc.cell_type, u'heading')
self.assertEqual(u'source' not in tc, True)
def test_heading_cell(self):
tc = new_heading_cell(u'hi', level=2)
self.assertEqual(tc.source, u'hi')
self.assertEqual(tc.level, 2)
class TestWorksheet(TestCase):
def test_empty_worksheet(self):
ws = new_worksheet()
self.assertEqual(ws.cells,[])
self.assertEqual(u'name' not in ws, True)
def test_worksheet(self):
cells = [new_code_cell(), new_text_cell(u'html')]
ws = new_worksheet(cells=cells)
self.assertEqual(ws.cells,cells)
class TestNotebook(TestCase):
def test_empty_notebook(self):
nb = new_notebook()
self.assertEqual(nb.worksheets, [])
self.assertEqual(nb.metadata, NotebookNode())
self.assertEqual(nb.nbformat,nbformat)
def test_notebook(self):
worksheets = [new_worksheet(),new_worksheet()]
metadata = new_metadata(name=u'foo')
nb = new_notebook(metadata=metadata,worksheets=worksheets)
self.assertEqual(nb.metadata.name,u'foo')
self.assertEqual(nb.worksheets,worksheets)
self.assertEqual(nb.nbformat,nbformat)
def test_notebook_name(self):
worksheets = [new_worksheet(),new_worksheet()]
nb = new_notebook(name='foo',worksheets=worksheets)
self.assertEqual(nb.metadata.name,u'foo')
self.assertEqual(nb.worksheets,worksheets)
self.assertEqual(nb.nbformat,nbformat)
class TestMetadata(TestCase):
def test_empty_metadata(self):
md = new_metadata()
self.assertEqual(u'name' not in md, True)
self.assertEqual(u'authors' not in md, True)
self.assertEqual(u'license' not in md, True)
self.assertEqual(u'saved' not in md, True)
self.assertEqual(u'modified' not in md, True)
self.assertEqual(u'gistid' not in md, True)
def test_metadata(self):
authors = [new_author(name='Bart Simpson',email='bsimpson@fox.com')]
md = new_metadata(name=u'foo',license=u'BSD',created=u'today',
modified=u'now',gistid=u'21341231',authors=authors)
self.assertEqual(md.name, u'foo')
self.assertEqual(md.license, u'BSD')
self.assertEqual(md.created, u'today')
self.assertEqual(md.modified, u'now')
self.assertEqual(md.gistid, u'21341231')
self.assertEqual(md.authors, authors)
class TestOutputs(TestCase):
def test_binary_png(self):
out = new_output(output_png=b'\x89PNG\r\n\x1a\n', output_type='display_data')
def test_b64b6tes_png(self):
out = new_output(output_png=b'iVBORw0KG', output_type='display_data')
def test_binary_jpeg(self):
out = new_output(output_jpeg=b'\xff\xd8', output_type='display_data')
def test_b64b6tes_jpeg(self):
out = new_output(output_jpeg=b'/9', output_type='display_data')

View file

@ -0,0 +1,48 @@
# -*- coding: utf8 -*-
from unittest import TestCase
from ipython_genutils.py3compat import string_types, iteritems
from . import formattest
from .. import nbpy
from .nbexamples import nb0, nb0_py
class TestPy(formattest.NBFormatTest, TestCase):
nb0_ref = nb0_py
ext = 'py'
mod = nbpy
ignored_keys = ['collapsed', 'outputs', 'prompt_number', 'metadata']
def assertSubset(self, da, db):
"""assert that da is a subset of db, ignoring self.ignored_keys.
Called recursively on containers, ultimately comparing individual
elements.
"""
if isinstance(da, dict):
for k,v in iteritems(da):
if k in self.ignored_keys:
continue
self.assertTrue(k in db)
self.assertSubset(v, db[k])
elif isinstance(da, list):
for a,b in zip(da, db):
self.assertSubset(a,b)
else:
if isinstance(da, string_types) and isinstance(db, string_types):
# pyfile is not sensitive to preserving leading/trailing
# newlines in blocks through roundtrip
da = da.strip('\n')
db = db.strip('\n')
self.assertEqual(da, db)
return True
def assertNBEquals(self, nba, nbb):
# since roundtrip is lossy, only compare keys that are preserved
# assumes nba is read from my file format
return self.assertSubset(nba, nbb)