231 lines
7.6 KiB
Python
231 lines
7.6 KiB
Python
|
"""Set of common tools to aid bundler implementations."""
|
||
|
|
||
|
# Copyright (c) Jupyter Development Team.
|
||
|
# Distributed under the terms of the Modified BSD License.
|
||
|
import os
|
||
|
import shutil
|
||
|
import errno
|
||
|
import nbformat
|
||
|
import fnmatch
|
||
|
import glob
|
||
|
|
||
|
def get_file_references(abs_nb_path, version):
|
||
|
"""Gets a list of files referenced either in Markdown fenced code blocks
|
||
|
or in HTML comments from the notebook. Expands patterns expressed in
|
||
|
gitignore syntax (https://git-scm.com/docs/gitignore). Returns the
|
||
|
fully expanded list of filenames relative to the notebook dirname.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
abs_nb_path: str
|
||
|
Absolute path of the notebook on disk
|
||
|
version: int
|
||
|
Version of the notebook document format to use
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
list
|
||
|
Filename strings relative to the notebook path
|
||
|
"""
|
||
|
ref_patterns = get_reference_patterns(abs_nb_path, version)
|
||
|
expanded = expand_references(os.path.dirname(abs_nb_path), ref_patterns)
|
||
|
return expanded
|
||
|
|
||
|
def get_reference_patterns(abs_nb_path, version):
|
||
|
"""Gets a list of reference patterns either in Markdown fenced code blocks
|
||
|
or in HTML comments from the notebook.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
abs_nb_path: str
|
||
|
Absolute path of the notebook on disk
|
||
|
version: int
|
||
|
Version of the notebook document format to use
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
list
|
||
|
Pattern strings from the notebook
|
||
|
"""
|
||
|
notebook = nbformat.read(abs_nb_path, version)
|
||
|
referenced_list = []
|
||
|
for cell in notebook.cells:
|
||
|
references = get_cell_reference_patterns(cell)
|
||
|
if references:
|
||
|
referenced_list = referenced_list + references
|
||
|
return referenced_list
|
||
|
|
||
|
def get_cell_reference_patterns(cell):
|
||
|
'''
|
||
|
Retrieves the list of references from a single notebook cell. Looks for
|
||
|
fenced code blocks or HTML comments in Markdown cells, e.g.,
|
||
|
|
||
|
```
|
||
|
some.csv
|
||
|
foo/
|
||
|
!foo/bar
|
||
|
```
|
||
|
|
||
|
or
|
||
|
|
||
|
<!--associate:
|
||
|
some.csv
|
||
|
foo/
|
||
|
!foo/bar
|
||
|
-->
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
cell: dict
|
||
|
Notebook cell object
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
list
|
||
|
Reference patterns found in the cell
|
||
|
'''
|
||
|
referenced = []
|
||
|
# invisible after execution: unrendered HTML comment
|
||
|
if cell.get('cell_type').startswith('markdown') and cell.get('source').startswith('<!--associate:'):
|
||
|
lines = cell.get('source')[len('<!--associate:'):].splitlines()
|
||
|
for line in lines:
|
||
|
if line.startswith('-->'):
|
||
|
break
|
||
|
# Trying to go out of the current directory leads to
|
||
|
# trouble when deploying
|
||
|
if line.find('../') < 0 and not line.startswith('#'):
|
||
|
referenced.append(line)
|
||
|
# visible after execution: rendered as a code element within a pre element
|
||
|
elif cell.get('cell_type').startswith('markdown') and cell.get('source').find('```') >= 0:
|
||
|
source = cell.get('source')
|
||
|
offset = source.find('```')
|
||
|
lines = source[offset + len('```'):].splitlines()
|
||
|
for line in lines:
|
||
|
if line.startswith('```'):
|
||
|
break
|
||
|
# Trying to go out of the current directory leads to
|
||
|
# trouble when deploying
|
||
|
if line.find('../') < 0 and not line.startswith('#'):
|
||
|
referenced.append(line)
|
||
|
|
||
|
# Clean out blank references
|
||
|
return [ref for ref in referenced if ref.strip()]
|
||
|
|
||
|
def expand_references(root_path, references):
|
||
|
"""Expands a set of reference patterns by evaluating them against the
|
||
|
given root directory. Expansions are performed against patterns
|
||
|
expressed in the same manner as in gitignore
|
||
|
(https://git-scm.com/docs/gitignore).
|
||
|
|
||
|
NOTE: Temporarily changes the current working directory when called.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
root_path: str
|
||
|
Assumed root directory for the patterns
|
||
|
references: list
|
||
|
Reference patterns from get_reference_patterns expressed with
|
||
|
forward-slash directory separators
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
list
|
||
|
Filename strings relative to the root path
|
||
|
"""
|
||
|
# Use normpath to convert to platform specific slashes, but be sure
|
||
|
# to retain a trailing slash which normpath pulls off
|
||
|
normalized_references = []
|
||
|
for ref in references:
|
||
|
normalized_ref = os.path.normpath(ref)
|
||
|
# un-normalized separator
|
||
|
if ref.endswith('/'):
|
||
|
normalized_ref += os.sep
|
||
|
normalized_references.append(normalized_ref)
|
||
|
references = normalized_references
|
||
|
|
||
|
globbed = []
|
||
|
negations = []
|
||
|
must_walk = []
|
||
|
for pattern in references:
|
||
|
if pattern and pattern.find(os.sep) < 0:
|
||
|
# simple shell glob
|
||
|
cwd = os.getcwd()
|
||
|
os.chdir(root_path)
|
||
|
if pattern.startswith('!'):
|
||
|
negations = negations + glob.glob(pattern[1:])
|
||
|
else:
|
||
|
globbed = globbed + glob.glob(pattern)
|
||
|
os.chdir(cwd)
|
||
|
elif pattern:
|
||
|
must_walk.append(pattern)
|
||
|
|
||
|
for pattern in must_walk:
|
||
|
pattern_is_negation = pattern.startswith('!')
|
||
|
if pattern_is_negation:
|
||
|
testpattern = pattern[1:]
|
||
|
else:
|
||
|
testpattern = pattern
|
||
|
for root, _, filenames in os.walk(root_path):
|
||
|
for filename in filenames:
|
||
|
joined = os.path.join(root[len(root_path) + 1:], filename)
|
||
|
if testpattern.endswith(os.sep):
|
||
|
if joined.startswith(testpattern):
|
||
|
if pattern_is_negation:
|
||
|
negations.append(joined)
|
||
|
else:
|
||
|
globbed.append(joined)
|
||
|
elif testpattern.find('**') >= 0:
|
||
|
# path wildcard
|
||
|
ends = testpattern.split('**')
|
||
|
if len(ends) == 2:
|
||
|
if joined.startswith(ends[0]) and joined.endswith(ends[1]):
|
||
|
if pattern_is_negation:
|
||
|
negations.append(joined)
|
||
|
else:
|
||
|
globbed.append(joined)
|
||
|
else:
|
||
|
# segments should be respected
|
||
|
if fnmatch.fnmatch(joined, testpattern):
|
||
|
if pattern_is_negation:
|
||
|
negations.append(joined)
|
||
|
else:
|
||
|
globbed.append(joined)
|
||
|
|
||
|
for negated in negations:
|
||
|
try:
|
||
|
globbed.remove(negated)
|
||
|
except ValueError as err:
|
||
|
pass
|
||
|
return set(globbed)
|
||
|
|
||
|
def copy_filelist(src, dst, src_relative_filenames):
|
||
|
"""Copies the given list of files, relative to src, into dst, creating
|
||
|
directories along the way as needed and ignore existence errors.
|
||
|
Skips any files that do not exist. Does not create empty directories
|
||
|
from src in dst.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
src: str
|
||
|
Root of the source directory
|
||
|
dst: str
|
||
|
Root of the destination directory
|
||
|
src_relative_filenames: list
|
||
|
Filenames relative to src
|
||
|
"""
|
||
|
for filename in src_relative_filenames:
|
||
|
# Only consider the file if it exists in src
|
||
|
if os.path.isfile(os.path.join(src, filename)):
|
||
|
parent_relative = os.path.dirname(filename)
|
||
|
if parent_relative:
|
||
|
# Make sure the parent directory exists
|
||
|
parent_dst = os.path.join(dst, parent_relative)
|
||
|
try:
|
||
|
os.makedirs(parent_dst)
|
||
|
except OSError as exc:
|
||
|
if exc.errno == errno.EEXIST:
|
||
|
pass
|
||
|
else:
|
||
|
raise exc
|
||
|
shutil.copy2(os.path.join(src, filename), os.path.join(dst, filename))
|