Uploaded Test files

This commit is contained in:
Batuhan Berk Başoğlu 2020-11-12 11:05:57 -05:00
parent f584ad9d97
commit 2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions

View file

@ -0,0 +1,2 @@
from joblib.test import test_memory
from joblib.test import test_hashing

View file

@ -0,0 +1,114 @@
"""
Small utilities for testing.
"""
import threading
import signal
import time
import os
import sys
import gc
from joblib._multiprocessing_helpers import mp
from joblib.testing import SkipTest, skipif
try:
import lz4
except ImportError:
lz4 = None
# A decorator to run tests only when numpy is available
try:
import numpy as np
def with_numpy(func):
"""A decorator to skip tests requiring numpy."""
return func
except ImportError:
def with_numpy(func):
"""A decorator to skip tests requiring numpy."""
def my_func():
raise SkipTest('Test requires numpy')
return my_func
np = None
# TODO: Turn this back on after refactoring yield based tests in test_hashing
# with_numpy = skipif(not np, reason='Test requires numpy.')
# we use memory_profiler library for memory consumption checks
try:
from memory_profiler import memory_usage
def with_memory_profiler(func):
"""A decorator to skip tests requiring memory_profiler."""
return func
def memory_used(func, *args, **kwargs):
"""Compute memory usage when executing func."""
gc.collect()
mem_use = memory_usage((func, args, kwargs), interval=.001)
return max(mem_use) - min(mem_use)
except ImportError:
def with_memory_profiler(func):
"""A decorator to skip tests requiring memory_profiler."""
def dummy_func():
raise SkipTest('Test requires memory_profiler.')
return dummy_func
memory_usage = memory_used = None
# A utility to kill the test runner in case a multiprocessing assumption
# triggers an infinite wait on a pipe by the master process for one of its
# failed workers
_KILLER_THREADS = dict()
def setup_autokill(module_name, timeout=30):
"""Timeout based suiciding thread to kill the test runner process
If some subprocess dies in an unexpected way we don't want the
parent process to block indefinitely.
"""
if "NO_AUTOKILL" in os.environ or "--pdb" in sys.argv:
# Do not install the autokiller
return
# Renew any previous contract under that name by first cancelling the
# previous version (that should normally not happen in practice)
teardown_autokill(module_name)
def autokill():
pid = os.getpid()
print("Timeout exceeded: terminating stalled process: %d" % pid)
os.kill(pid, signal.SIGTERM)
# If were are still there ask the OS to kill ourself for real
time.sleep(0.5)
print("Timeout exceeded: killing stalled process: %d" % pid)
os.kill(pid, signal.SIGKILL)
_KILLER_THREADS[module_name] = t = threading.Timer(timeout, autokill)
t.start()
def teardown_autokill(module_name):
"""Cancel a previously started killer thread"""
killer = _KILLER_THREADS.get(module_name)
if killer is not None:
killer.cancel()
with_multiprocessing = skipif(
mp is None, reason='Needs multiprocessing to run.')
with_dev_shm = skipif(
not os.path.exists('/dev/shm'),
reason='This test requires a large /dev/shm shared memory fs.')
with_lz4 = skipif(lz4 is None, reason='Needs lz4 compression to run')
without_lz4 = skipif(
lz4 is not None, reason='Needs lz4 not being installed to run')

View file

@ -0,0 +1,94 @@
"""
This script is used to generate test data for joblib/test/test_numpy_pickle.py
"""
import sys
import re
# pytest needs to be able to import this module even when numpy is
# not installed
try:
import numpy as np
except ImportError:
np = None
import joblib
def get_joblib_version(joblib_version=joblib.__version__):
"""Normalize joblib version by removing suffix.
>>> get_joblib_version('0.8.4')
'0.8.4'
>>> get_joblib_version('0.8.4b1')
'0.8.4'
>>> get_joblib_version('0.9.dev0')
'0.9'
"""
matches = [re.match(r'(\d+).*', each)
for each in joblib_version.split('.')]
return '.'.join([m.group(1) for m in matches if m is not None])
def write_test_pickle(to_pickle, args):
kwargs = {}
compress = args.compress
method = args.method
joblib_version = get_joblib_version()
py_version = '{0[0]}{0[1]}'.format(sys.version_info)
numpy_version = ''.join(np.__version__.split('.')[:2])
# The game here is to generate the right filename according to the options.
body = '_compressed' if (compress and method == 'zlib') else ''
if compress:
if method == 'zlib':
kwargs['compress'] = True
extension = '.gz'
else:
kwargs['compress'] = (method, 3)
extension = '.pkl.{}'.format(method)
if args.cache_size:
kwargs['cache_size'] = 0
body += '_cache_size'
else:
extension = '.pkl'
pickle_filename = 'joblib_{}{}_pickle_py{}_np{}{}'.format(
joblib_version, body, py_version, numpy_version, extension)
try:
joblib.dump(to_pickle, pickle_filename, **kwargs)
except Exception as e:
# With old python version (=< 3.3.), we can arrive there when
# dumping compressed pickle with LzmaFile.
print("Error: cannot generate file '{}' with arguments '{}'. "
"Error was: {}".format(pickle_filename, kwargs, e))
else:
print("File '{}' generated successfuly.".format(pickle_filename))
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="Joblib pickle data "
"generator.")
parser.add_argument('--cache_size', action="store_true",
help="Force creation of companion numpy "
"files for pickled arrays.")
parser.add_argument('--compress', action="store_true",
help="Generate compress pickles.")
parser.add_argument('--method', type=str, default='zlib',
choices=['zlib', 'gzip', 'bz2', 'xz', 'lzma', 'lz4'],
help="Set compression method.")
# We need to be specific about dtypes in particular endianness
# because the pickles can be generated on one architecture and
# the tests run on another one. See
# https://github.com/joblib/joblib/issues/279.
to_pickle = [np.arange(5, dtype=np.dtype('<i8')),
np.arange(5, dtype=np.dtype('<f8')),
np.array([1, 'abc', {'a': 1, 'b': 2}], dtype='O'),
# all possible bytes as a byte string
np.arange(256, dtype=np.uint8).tobytes(),
np.matrix([0, 1, 2], dtype=np.dtype('<i8')),
# unicode string with non-ascii chars
u"C'est l'\xe9t\xe9 !"]
write_test_pickle(to_pickle, parser.parse_args())

View file

@ -0,0 +1,35 @@
import mmap
from joblib.backports import make_memmap, concurrency_safe_rename
from joblib.test.common import with_numpy
from joblib.testing import parametrize
from joblib import Parallel, delayed
@with_numpy
def test_memmap(tmpdir):
fname = tmpdir.join('test.mmap').strpath
size = 5 * mmap.ALLOCATIONGRANULARITY
offset = mmap.ALLOCATIONGRANULARITY + 1
memmap_obj = make_memmap(fname, shape=size, mode='w+', offset=offset)
assert memmap_obj.offset == offset
@parametrize('dst_content', [None, 'dst content'])
@parametrize('backend', [None, 'threading'])
def test_concurrency_safe_rename(tmpdir, dst_content, backend):
src_paths = [tmpdir.join('src_%d' % i) for i in range(4)]
for src_path in src_paths:
src_path.write('src content')
dst_path = tmpdir.join('dst')
if dst_content is not None:
dst_path.write(dst_content)
Parallel(n_jobs=4, backend=backend)(
delayed(concurrency_safe_rename)(src_path.strpath, dst_path.strpath)
for src_path in src_paths
)
assert dst_path.exists()
assert dst_path.read() == 'src content'
for src_path in src_paths:
assert not src_path.exists()

View file

@ -0,0 +1,460 @@
from __future__ import print_function, division, absolute_import
import os
import pytest
from random import random
from uuid import uuid4
from time import sleep
from .. import Parallel, delayed, parallel_backend
from ..parallel import ThreadingBackend, AutoBatchingMixin
from .._dask import DaskDistributedBackend
distributed = pytest.importorskip('distributed')
from distributed import Client, LocalCluster, get_client
from distributed.metrics import time
from distributed.utils_test import cluster, inc
def noop(*args, **kwargs):
pass
def slow_raise_value_error(condition, duration=0.05):
sleep(duration)
if condition:
raise ValueError("condition evaluated to True")
def count_events(event_name, client):
worker_events = client.run(lambda dask_worker: dask_worker.log)
event_counts = {}
for w, events in worker_events.items():
event_counts[w] = len([event for event in list(events)
if event[1] == event_name])
return event_counts
def test_simple(loop):
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask') as (ba, _):
seq = Parallel()(delayed(inc)(i) for i in range(10))
assert seq == [inc(i) for i in range(10)]
with pytest.raises(ValueError):
Parallel()(delayed(slow_raise_value_error)(i == 3)
for i in range(10))
seq = Parallel()(delayed(inc)(i) for i in range(10))
assert seq == [inc(i) for i in range(10)]
def test_dask_backend_uses_autobatching(loop):
assert (DaskDistributedBackend.compute_batch_size
is AutoBatchingMixin.compute_batch_size)
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask') as (ba, _):
with Parallel() as parallel:
# The backend should be initialized with a default
# batch size of 1:
backend = parallel._backend
assert isinstance(backend, DaskDistributedBackend)
assert backend.parallel is parallel
assert backend._effective_batch_size == 1
# Launch many short tasks that should trigger
# auto-batching:
parallel(
delayed(lambda: None)()
for _ in range(int(1e4))
)
assert backend._effective_batch_size > 10
def random2():
return random()
def test_dont_assume_function_purity(loop):
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask') as (ba, _):
x, y = Parallel()(delayed(random2)() for i in range(2))
assert x != y
@pytest.mark.parametrize("mixed", [True, False])
def test_dask_funcname(loop, mixed):
from joblib._dask import Batch
if not mixed:
tasks = [delayed(inc)(i) for i in range(4)]
batch_repr = 'batch_of_inc_4_calls'
else:
tasks = [
delayed(abs)(i) if i % 2 else delayed(inc)(i) for i in range(4)
]
batch_repr = 'mixed_batch_of_inc_4_calls'
assert repr(Batch(tasks)) == batch_repr
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client:
with parallel_backend('dask') as (ba, _):
_ = Parallel(batch_size=2, pre_dispatch='all')(tasks)
def f(dask_scheduler):
return list(dask_scheduler.transition_log)
batch_repr = batch_repr.replace('4', '2')
log = client.run_on_scheduler(f)
assert all('batch_of_inc' in tup[0] for tup in log)
def test_no_undesired_distributed_cache_hit(loop):
# Dask has a pickle cache for callables that are called many times. Because
# the dask backends used to wrapp both the functions and the arguments
# under instances of the Batch callable class this caching mechanism could
# lead to bugs as described in: https://github.com/joblib/joblib/pull/1055
# The joblib-dask backend has been refactored to avoid bundling the
# arguments as an attribute of the Batch instance to avoid this problem.
# This test serves as non-regression problem.
# Use a large number of input arguments to give the AutoBatchingMixin
# enough tasks to kick-in.
lists = [[] for _ in range(100)]
np = pytest.importorskip('numpy')
X = np.arange(int(1e6))
def isolated_operation(list_, X=None):
list_.append(uuid4().hex)
return list_
cluster = LocalCluster(n_workers=1, threads_per_worker=2)
client = Client(cluster)
try:
with parallel_backend('dask') as (ba, _):
# dispatches joblib.parallel.BatchedCalls
res = Parallel()(
delayed(isolated_operation)(list_) for list_ in lists
)
# The original arguments should not have been mutated as the mutation
# happens in the dask worker process.
assert lists == [[] for _ in range(100)]
# Here we did not pass any large numpy array as argument to
# isolated_operation so no scattering event should happen under the
# hood.
counts = count_events('receive-from-scatter', client)
assert sum(counts.values()) == 0
assert all([len(r) == 1 for r in res])
with parallel_backend('dask') as (ba, _):
# Append a large array which will be scattered by dask, and
# dispatch joblib._dask.Batch
res = Parallel()(
delayed(isolated_operation)(list_, X=X) for list_ in lists
)
# This time, auto-scattering should have kicked it.
counts = count_events('receive-from-scatter', client)
assert sum(counts.values()) > 0
assert all([len(r) == 1 for r in res])
finally:
client.close()
cluster.close()
class CountSerialized(object):
def __init__(self, x):
self.x = x
self.count = 0
def __add__(self, other):
return self.x + getattr(other, 'x', other)
__radd__ = __add__
def __reduce__(self):
self.count += 1
return (CountSerialized, (self.x,))
def add5(a, b, c, d=0, e=0):
return a + b + c + d + e
def test_manual_scatter(loop):
x = CountSerialized(1)
y = CountSerialized(2)
z = CountSerialized(3)
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask', scatter=[x, y]) as (ba, _):
f = delayed(add5)
tasks = [f(x, y, z, d=4, e=5),
f(x, z, y, d=5, e=4),
f(y, x, z, d=x, e=5),
f(z, z, x, d=z, e=y)]
expected = [func(*args, **kwargs)
for func, args, kwargs in tasks]
results = Parallel()(tasks)
# Scatter must take a list/tuple
with pytest.raises(TypeError):
with parallel_backend('dask', loop=loop, scatter=1):
pass
assert results == expected
# Scattered variables only serialized once
assert x.count == 1
assert y.count == 1
# Depending on the version of distributed, the unscattered z variable
# is either pickled 4 or 6 times, possibly because of the memoization
# of objects that appear several times in the arguments of a delayed
# task.
assert z.count in (4, 6)
def test_auto_scatter(loop):
np = pytest.importorskip('numpy')
data1 = np.ones(int(1e4), dtype=np.uint8)
data2 = np.ones(int(1e4), dtype=np.uint8)
data_to_process = ([data1] * 3) + ([data2] * 3)
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client:
with parallel_backend('dask') as (ba, _):
# Passing the same data as arg and kwarg triggers a single
# scatter operation whose result is reused.
Parallel()(delayed(noop)(data, data, i, opt=data)
for i, data in enumerate(data_to_process))
# By default large array are automatically scattered with
# broadcast=1 which means that one worker must directly receive
# the data from the scatter operation once.
counts = count_events('receive-from-scatter', client)
# assert counts[a['address']] + counts[b['address']] == 2
assert 2 <= counts[a['address']] + counts[b['address']] <= 4
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client:
with parallel_backend('dask') as (ba, _):
Parallel()(delayed(noop)(data1[:3], i) for i in range(5))
# Small arrays are passed within the task definition without going
# through a scatter operation.
counts = count_events('receive-from-scatter', client)
assert counts[a['address']] == 0
assert counts[b['address']] == 0
@pytest.mark.parametrize("retry_no", list(range(2)))
def test_nested_scatter(loop, retry_no):
np = pytest.importorskip('numpy')
NUM_INNER_TASKS = 10
NUM_OUTER_TASKS = 10
def my_sum(x, i, j):
return np.sum(x)
def outer_function_joblib(array, i):
client = get_client() # noqa
with parallel_backend("dask"):
results = Parallel()(
delayed(my_sum)(array[j:], i, j) for j in range(
NUM_INNER_TASKS)
)
return sum(results)
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as _:
with parallel_backend("dask"):
my_array = np.ones(10000)
_ = Parallel()(
delayed(outer_function_joblib)(
my_array[i:], i) for i in range(NUM_OUTER_TASKS)
)
def test_nested_backend_context_manager(loop):
def get_nested_pids():
pids = set(Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2)))
pids |= set(Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2)))
return pids
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client:
with parallel_backend('dask') as (ba, _):
pid_groups = Parallel(n_jobs=2)(
delayed(get_nested_pids)()
for _ in range(10)
)
for pid_group in pid_groups:
assert len(set(pid_group)) <= 2
# No deadlocks
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask') as (ba, _):
pid_groups = Parallel(n_jobs=2)(
delayed(get_nested_pids)()
for _ in range(10)
)
for pid_group in pid_groups:
assert len(set(pid_group)) <= 2
def test_nested_backend_context_manager_implicit_n_jobs(loop):
# Check that Parallel with no explicit n_jobs value automatically selects
# all the dask workers, including in nested calls.
def _backend_type(p):
return p._backend.__class__.__name__
def get_nested_implicit_n_jobs():
with Parallel() as p:
return _backend_type(p), p.n_jobs
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask') as (ba, _):
with Parallel() as p:
assert _backend_type(p) == "DaskDistributedBackend"
assert p.n_jobs == -1
all_nested_n_jobs = p(
delayed(get_nested_implicit_n_jobs)()
for _ in range(2)
)
for backend_type, nested_n_jobs in all_nested_n_jobs:
assert backend_type == "DaskDistributedBackend"
assert nested_n_jobs == -1
def test_errors(loop):
with pytest.raises(ValueError) as info:
with parallel_backend('dask'):
pass
assert "create a dask client" in str(info.value).lower()
def test_correct_nested_backend(loop):
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
# No requirement, should be us
with parallel_backend('dask') as (ba, _):
result = Parallel(n_jobs=2)(
delayed(outer)(nested_require=None) for _ in range(1))
assert isinstance(result[0][0][0], DaskDistributedBackend)
# Require threads, should be threading
with parallel_backend('dask') as (ba, _):
result = Parallel(n_jobs=2)(
delayed(outer)(nested_require='sharedmem')
for _ in range(1))
assert isinstance(result[0][0][0], ThreadingBackend)
def outer(nested_require):
return Parallel(n_jobs=2, prefer='threads')(
delayed(middle)(nested_require) for _ in range(1)
)
def middle(require):
return Parallel(n_jobs=2, require=require)(
delayed(inner)() for _ in range(1)
)
def inner():
return Parallel()._backend
def test_secede_with_no_processes(loop):
# https://github.com/dask/distributed/issues/1775
with Client(loop=loop, processes=False, set_as_default=True):
with parallel_backend('dask'):
Parallel(n_jobs=4)(delayed(id)(i) for i in range(2))
def _worker_address(_):
from distributed import get_worker
return get_worker().address
def test_dask_backend_keywords(loop):
with cluster() as (s, [a, b]):
with Client(s['address'], loop=loop) as client: # noqa: F841
with parallel_backend('dask', workers=a['address']) as (ba, _):
seq = Parallel()(
delayed(_worker_address)(i) for i in range(10))
assert seq == [a['address']] * 10
with parallel_backend('dask', workers=b['address']) as (ba, _):
seq = Parallel()(
delayed(_worker_address)(i) for i in range(10))
assert seq == [b['address']] * 10
def test_cleanup(loop):
with Client(processes=False, loop=loop) as client:
with parallel_backend('dask'):
Parallel()(delayed(inc)(i) for i in range(10))
start = time()
while client.cluster.scheduler.tasks:
sleep(0.01)
assert time() < start + 5
assert not client.futures
@pytest.mark.parametrize("cluster_strategy", ["adaptive", "late_scaling"])
@pytest.mark.skipif(
distributed.__version__ <= '2.1.1' and distributed.__version__ >= '1.28.0',
reason="distributed bug - https://github.com/dask/distributed/pull/2841")
def test_wait_for_workers(cluster_strategy):
cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
client = Client(cluster)
if cluster_strategy == "adaptive":
cluster.adapt(minimum=0, maximum=2)
elif cluster_strategy == "late_scaling":
# Tell the cluster to start workers but this is a non-blocking call
# and new workers might take time to connect. In this case the Parallel
# call should wait for at least one worker to come up before starting
# to schedule work.
cluster.scale(2)
try:
with parallel_backend('dask'):
# The following should wait a bit for at least one worker to
# become available.
Parallel()(delayed(inc)(i) for i in range(10))
finally:
client.close()
cluster.close()
def test_wait_for_workers_timeout():
# Start a cluster with 0 worker:
cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
client = Client(cluster)
try:
with parallel_backend('dask', wait_for_workers_timeout=0.1):
# Short timeout: DaskDistributedBackend
msg = "DaskDistributedBackend has no worker after 0.1 seconds."
with pytest.raises(TimeoutError, match=msg):
Parallel()(delayed(inc)(i) for i in range(10))
with parallel_backend('dask', wait_for_workers_timeout=0):
# No timeout: fallback to generic joblib failure:
msg = "DaskDistributedBackend has no active worker"
with pytest.raises(RuntimeError, match=msg):
Parallel()(delayed(inc)(i) for i in range(10))
finally:
client.close()
cluster.close()

View file

@ -0,0 +1,31 @@
"""
Tests making sure that deprecated objects properly raise a deprecation warning
when imported/created.
"""
import sys
import pytest
from joblib.my_exceptions import _deprecated_names as _deprecated_exceptions
from joblib.format_stack import _deprecated_names as _deprecated_format_utils
@pytest.mark.xfail(sys.version_info < (3, 7), reason="no module-level getattr")
def test_deprecated_joblib_exceptions():
assert 'JoblibException' in _deprecated_exceptions
for name in _deprecated_exceptions:
msg = ('{} is deprecated and will be removed from joblib in '
'0.16'.format(name))
with pytest.warns(DeprecationWarning, match=msg):
exec('from joblib.my_exceptions import {}'.format(name))
@pytest.mark.xfail(sys.version_info < (3, 7), reason="no module-level getattr")
def test_deprecated_formatting_utilities(capsys):
assert 'safe_repr' in _deprecated_format_utils
assert 'eq_repr' in _deprecated_format_utils
for name in _deprecated_format_utils:
msg = ('{} is deprecated and will be removed from joblib in '
'0.16'.format(name))
with pytest.warns(DeprecationWarning, match=msg):
exec('from joblib.format_stack import {}'.format(name))

View file

@ -0,0 +1,71 @@
"""
Unit tests for the disk utilities.
"""
# Authors: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Lars Buitinck
# Copyright (c) 2010 Gael Varoquaux
# License: BSD Style, 3 clauses.
from __future__ import with_statement
import array
import os
from joblib.disk import disk_used, memstr_to_bytes, mkdirp, rm_subdirs
from joblib.testing import parametrize, raises
###############################################################################
def test_disk_used(tmpdir):
cachedir = tmpdir.strpath
# Not write a file that is 1M big in this directory, and check the
# size. The reason we use such a big file is that it makes us robust
# to errors due to block allocation.
a = array.array('i')
sizeof_i = a.itemsize
target_size = 1024
n = int(target_size * 1024 / sizeof_i)
a = array.array('i', n * (1,))
with open(os.path.join(cachedir, 'test'), 'wb') as output:
a.tofile(output)
assert disk_used(cachedir) >= target_size
assert disk_used(cachedir) < target_size + 12
@parametrize('text,value',
[('80G', 80 * 1024 ** 3),
('1.4M', int(1.4 * 1024 ** 2)),
('120M', 120 * 1024 ** 2),
('53K', 53 * 1024)])
def test_memstr_to_bytes(text, value):
assert memstr_to_bytes(text) == value
@parametrize('text,exception,regex',
[('fooG', ValueError, r'Invalid literal for size.*fooG.*'),
('1.4N', ValueError, r'Invalid literal for size.*1.4N.*')])
def test_memstr_to_bytes_exception(text, exception, regex):
with raises(exception) as excinfo:
memstr_to_bytes(text)
assert excinfo.match(regex)
def test_mkdirp(tmpdir):
mkdirp(os.path.join(tmpdir.strpath, 'ham'))
mkdirp(os.path.join(tmpdir.strpath, 'ham'))
mkdirp(os.path.join(tmpdir.strpath, 'spam', 'spam'))
# Not all OSErrors are ignored
with raises(OSError):
mkdirp('')
def test_rm_subdirs(tmpdir):
sub_path = os.path.join(tmpdir.strpath, "am", "stram")
full_path = os.path.join(sub_path, "gram")
mkdirp(os.path.join(full_path))
rm_subdirs(sub_path)
assert os.path.exists(sub_path)
assert not os.path.exists(full_path)

View file

@ -0,0 +1,129 @@
"""
Unit tests for the stack formatting utilities
"""
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Copyright (c) 2010 Gael Varoquaux
# License: BSD Style, 3 clauses.
import imp
import os
import re
import sys
import pytest
from joblib.format_stack import safe_repr, _fixed_getframes, format_records
from joblib.format_stack import format_exc
from joblib.test.common import with_numpy, np
###############################################################################
class Vicious(object):
def __repr__(self):
raise ValueError
def test_safe_repr():
safe_repr(Vicious())
def _change_file_extensions_to_pyc(record):
_1, filename, _2, _3, _4, _5 = record
if filename.endswith('.py'):
filename += 'c'
return _1, filename, _2, _3, _4, _5
def _raise_exception(a, b):
"""Function that raises with a non trivial call stack
"""
def helper(a, b):
raise ValueError('Nope, this can not work')
helper(a, b)
def test_format_records():
try:
_raise_exception('a', 42)
except ValueError:
etb = sys.exc_info()[2]
records = _fixed_getframes(etb)
# Modify filenames in traceback records from .py to .pyc
pyc_records = [_change_file_extensions_to_pyc(record)
for record in records]
formatted_records = format_records(pyc_records)
# Check that the .py file and not the .pyc one is listed in
# the traceback
for fmt_rec in formatted_records:
assert 'test_format_stack.py in' in fmt_rec
# Check exception stack
arrow_regex = r'^-+>\s+\d+\s+'
assert re.search(arrow_regex + r"_raise_exception\('a', 42\)",
formatted_records[0],
re.MULTILINE)
assert re.search(arrow_regex + r'helper\(a, b\)',
formatted_records[1],
re.MULTILINE)
assert "a = 'a'" in formatted_records[1]
assert 'b = 42' in formatted_records[1]
assert re.search(arrow_regex +
r"raise ValueError\('Nope, this can not work'\)",
formatted_records[2],
re.MULTILINE)
def test_format_records_file_with_less_lines_than_context(tmpdir):
# See https://github.com/joblib/joblib/issues/420
filename = os.path.join(tmpdir.strpath, 'small_file.py')
code_lines = ['def func():', ' 1/0']
code = '\n'.join(code_lines)
with open(filename, 'w') as f:
f.write(code)
small_file = imp.load_source('small_file', filename)
if not hasattr(small_file, 'func'):
pytest.skip("PyPy bug?")
try:
small_file.func()
except ZeroDivisionError:
etb = sys.exc_info()[2]
records = _fixed_getframes(etb, context=10)
# Check that if context is bigger than the number of lines in
# the file you do not get padding
frame, tb_filename, line, func_name, context, _ = records[-1]
assert [l.rstrip() for l in context] == code_lines
formatted_records = format_records(records)
# 2 lines for header in the traceback: lines of ...... +
# filename with function
len_header = 2
nb_lines_formatted_records = len(formatted_records[1].splitlines())
assert (nb_lines_formatted_records == len_header + len(code_lines))
# Check exception stack
arrow_regex = r'^-+>\s+\d+\s+'
assert re.search(arrow_regex + r'1/0',
formatted_records[1],
re.MULTILINE)
@with_numpy
def test_format_exc_with_compiled_code():
# Trying to tokenize compiled C code raise SyntaxError.
# See https://github.com/joblib/joblib/issues/101 for more details.
try:
np.random.uniform('invalid_value')
except Exception:
exc_type, exc_value, exc_traceback = sys.exc_info()
formatted_exc = format_exc(exc_type, exc_value,
exc_traceback, context=10)
# The name of the extension can be something like
# mtrand.cpython-33m.so
pattern = r'mtrand[a-z0-9._-]*\.(so|pyd)'
assert re.search(pattern, formatted_exc)

View file

@ -0,0 +1,290 @@
"""
Test the func_inspect module.
"""
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
# Copyright (c) 2009 Gael Varoquaux
# License: BSD Style, 3 clauses.
import functools
from joblib.func_inspect import filter_args, get_func_name, get_func_code
from joblib.func_inspect import _clean_win_chars, format_signature
from joblib.memory import Memory
from joblib.test.common import with_numpy
from joblib.testing import fixture, parametrize, raises
###############################################################################
# Module-level functions and fixture, for tests
def f(x, y=0):
pass
def g(x):
pass
def h(x, y=0, *args, **kwargs):
pass
def i(x=1):
pass
def j(x, y, **kwargs):
pass
def k(*args, **kwargs):
pass
def m1(x, *, y):
pass
def m2(x, *, y, z=3):
pass
@fixture(scope='module')
def cached_func(tmpdir_factory):
# Create a Memory object to test decorated functions.
# We should be careful not to call the decorated functions, so that
# cache directories are not created in the temp dir.
cachedir = tmpdir_factory.mktemp("joblib_test_func_inspect")
mem = Memory(cachedir.strpath)
@mem.cache
def cached_func_inner(x):
return x
return cached_func_inner
class Klass(object):
def f(self, x):
return x
###############################################################################
# Tests
@parametrize('func,args,filtered_args',
[(f, [[], (1, )], {'x': 1, 'y': 0}),
(f, [['x'], (1, )], {'y': 0}),
(f, [['y'], (0, )], {'x': 0}),
(f, [['y'], (0, ), {'y': 1}], {'x': 0}),
(f, [['x', 'y'], (0, )], {}),
(f, [[], (0,), {'y': 1}], {'x': 0, 'y': 1}),
(f, [['y'], (), {'x': 2, 'y': 1}], {'x': 2}),
(g, [[], (), {'x': 1}], {'x': 1}),
(i, [[], (2, )], {'x': 2})])
def test_filter_args(func, args, filtered_args):
assert filter_args(func, *args) == filtered_args
def test_filter_args_method():
obj = Klass()
assert filter_args(obj.f, [], (1, )) == {'x': 1, 'self': obj}
@parametrize('func,args,filtered_args',
[(h, [[], (1, )],
{'x': 1, 'y': 0, '*': [], '**': {}}),
(h, [[], (1, 2, 3, 4)],
{'x': 1, 'y': 2, '*': [3, 4], '**': {}}),
(h, [[], (1, 25), {'ee': 2}],
{'x': 1, 'y': 25, '*': [], '**': {'ee': 2}}),
(h, [['*'], (1, 2, 25), {'ee': 2}],
{'x': 1, 'y': 2, '**': {'ee': 2}})])
def test_filter_varargs(func, args, filtered_args):
assert filter_args(func, *args) == filtered_args
test_filter_kwargs_extra_params = [
(m1, [[], (1,), {'y': 2}], {'x': 1, 'y': 2}),
(m2, [[], (1,), {'y': 2}], {'x': 1, 'y': 2, 'z': 3})
]
@parametrize('func,args,filtered_args',
[(k, [[], (1, 2), {'ee': 2}],
{'*': [1, 2], '**': {'ee': 2}}),
(k, [[], (3, 4)],
{'*': [3, 4], '**': {}})] +
test_filter_kwargs_extra_params)
def test_filter_kwargs(func, args, filtered_args):
assert filter_args(func, *args) == filtered_args
def test_filter_args_2():
assert (filter_args(j, [], (1, 2), {'ee': 2}) ==
{'x': 1, 'y': 2, '**': {'ee': 2}})
ff = functools.partial(f, 1)
# filter_args has to special-case partial
assert filter_args(ff, [], (1, )) == {'*': [1], '**': {}}
assert filter_args(ff, ['y'], (1, )) == {'*': [1], '**': {}}
@parametrize('func,funcname', [(f, 'f'), (g, 'g'),
(cached_func, 'cached_func')])
def test_func_name(func, funcname):
# Check that we are not confused by decoration
# here testcase 'cached_func' is the function itself
assert get_func_name(func)[1] == funcname
def test_func_name_on_inner_func(cached_func):
# Check that we are not confused by decoration
# here testcase 'cached_func' is the 'cached_func_inner' function
# returned by 'cached_func' fixture
assert get_func_name(cached_func)[1] == 'cached_func_inner'
def test_func_inspect_errors():
# Check that func_inspect is robust and will work on weird objects
assert get_func_name('a'.lower)[-1] == 'lower'
assert get_func_code('a'.lower)[1:] == (None, -1)
ff = lambda x: x
assert get_func_name(ff, win_characters=False)[-1] == '<lambda>'
assert get_func_code(ff)[1] == __file__.replace('.pyc', '.py')
# Simulate a function defined in __main__
ff.__module__ = '__main__'
assert get_func_name(ff, win_characters=False)[-1] == '<lambda>'
assert get_func_code(ff)[1] == __file__.replace('.pyc', '.py')
def func_with_kwonly_args(a, b, *, kw1='kw1', kw2='kw2'):
pass
def func_with_signature(a: int, b: int) -> None:
pass
def test_filter_args_edge_cases():
assert (
filter_args(func_with_kwonly_args, [], (1, 2),
{'kw1': 3, 'kw2': 4}) ==
{'a': 1, 'b': 2, 'kw1': 3, 'kw2': 4})
# filter_args doesn't care about keyword-only arguments so you
# can pass 'kw1' into *args without any problem
with raises(ValueError) as excinfo:
filter_args(func_with_kwonly_args, [], (1, 2, 3), {'kw2': 2})
excinfo.match("Keyword-only parameter 'kw1' was passed as positional "
"parameter")
assert (
filter_args(func_with_kwonly_args, ['b', 'kw2'], (1, 2),
{'kw1': 3, 'kw2': 4}) ==
{'a': 1, 'kw1': 3})
assert (filter_args(func_with_signature, ['b'], (1, 2)) == {'a': 1})
def test_bound_methods():
""" Make sure that calling the same method on two different instances
of the same class does resolv to different signatures.
"""
a = Klass()
b = Klass()
assert filter_args(a.f, [], (1, )) != filter_args(b.f, [], (1, ))
@parametrize('exception,regex,func,args',
[(ValueError, 'ignore_lst must be a list of parameters to ignore',
f, ['bar', (None, )]),
(ValueError, r'Ignore list: argument \'(.*)\' is not defined',
g, [['bar'], (None, )]),
(ValueError, 'Wrong number of arguments',
h, [[]])])
def test_filter_args_error_msg(exception, regex, func, args):
""" Make sure that filter_args returns decent error messages, for the
sake of the user.
"""
with raises(exception) as excinfo:
filter_args(func, *args)
excinfo.match(regex)
def test_filter_args_no_kwargs_mutation():
"""None-regression test against 0.12.0 changes.
https://github.com/joblib/joblib/pull/75
Make sure filter args doesn't mutate the kwargs dict that gets passed in.
"""
kwargs = {'x': 0}
filter_args(g, [], [], kwargs)
assert kwargs == {'x': 0}
def test_clean_win_chars():
string = r'C:\foo\bar\main.py'
mangled_string = _clean_win_chars(string)
for char in ('\\', ':', '<', '>', '!'):
assert char not in mangled_string
@parametrize('func,args,kwargs,sgn_expected',
[(g, [list(range(5))], {}, 'g([0, 1, 2, 3, 4])'),
(k, [1, 2, (3, 4)], {'y': True}, 'k(1, 2, (3, 4), y=True)')])
def test_format_signature(func, args, kwargs, sgn_expected):
# Test signature formatting.
path, sgn_result = format_signature(func, *args, **kwargs)
assert sgn_result == sgn_expected
def test_format_signature_long_arguments():
shortening_threshold = 1500
# shortening gets it down to 700 characters but there is the name
# of the function in the signature and a few additional things
# like dots for the ellipsis
shortening_target = 700 + 10
arg = 'a' * shortening_threshold
_, signature = format_signature(h, arg)
assert len(signature) < shortening_target
nb_args = 5
args = [arg for _ in range(nb_args)]
_, signature = format_signature(h, *args)
assert len(signature) < shortening_target * nb_args
kwargs = {str(i): arg for i, arg in enumerate(args)}
_, signature = format_signature(h, **kwargs)
assert len(signature) < shortening_target * nb_args
_, signature = format_signature(h, *args, **kwargs)
assert len(signature) < shortening_target * 2 * nb_args
@with_numpy
def test_format_signature_numpy():
""" Test the format signature formatting with numpy.
"""
def test_special_source_encoding():
from joblib.test.test_func_inspect_special_encoding import big5_f
func_code, source_file, first_line = get_func_code(big5_f)
assert first_line == 5
assert "def big5_f():" in func_code
assert "test_func_inspect_special_encoding" in source_file
def _get_code():
from joblib.test.test_func_inspect_special_encoding import big5_f
return get_func_code(big5_f)[0]
def test_func_code_consistency():
from joblib.parallel import Parallel, delayed
codes = Parallel(n_jobs=2)(delayed(_get_code)() for _ in range(5))
assert len(set(codes)) == 1

Some files were not shown because too many files have changed in this diff Show more