247 lines
9 KiB
Python
247 lines
9 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Based on code from the vispy project
|
|
# Distributed under the (new) BSD License. See LICENSE.txt for more info.
|
|
|
|
"""Data downloading and reading functions
|
|
"""
|
|
|
|
from math import log
|
|
import os
|
|
from os import path as op
|
|
import sys
|
|
import shutil
|
|
import time
|
|
|
|
from . import appdata_dir, resource_dirs
|
|
from . import StdoutProgressIndicator, urlopen
|
|
|
|
|
|
class InternetNotAllowedError(IOError):
|
|
""" Plugins that need resources can just use get_remote_file(), but
|
|
should catch this error and silently ignore it.
|
|
"""
|
|
|
|
pass
|
|
|
|
|
|
class NeedDownloadError(IOError):
|
|
""" Is raised when a remote file is requested that is not locally
|
|
available, but which needs to be explicitly downloaded by the user.
|
|
"""
|
|
|
|
|
|
def get_remote_file(fname, directory=None, force_download=False, auto=True):
|
|
""" Get a the filename for the local version of a file from the web
|
|
|
|
Parameters
|
|
----------
|
|
fname : str
|
|
The relative filename on the remote data repository to download.
|
|
These correspond to paths on
|
|
``https://github.com/imageio/imageio-binaries/``.
|
|
directory : str | None
|
|
The directory where the file will be cached if a download was
|
|
required to obtain the file. By default, the appdata directory
|
|
is used. This is also the first directory that is checked for
|
|
a local version of the file. If the directory does not exist,
|
|
it will be created.
|
|
force_download : bool | str
|
|
If True, the file will be downloaded even if a local copy exists
|
|
(and this copy will be overwritten). Can also be a YYYY-MM-DD date
|
|
to ensure a file is up-to-date (modified date of a file on disk,
|
|
if present, is checked).
|
|
auto : bool
|
|
Whether to auto-download the file if its not present locally. Default
|
|
True. If False and a download is needed, raises NeedDownloadError.
|
|
|
|
Returns
|
|
-------
|
|
fname : str
|
|
The path to the file on the local system.
|
|
"""
|
|
_url_root = "https://github.com/imageio/imageio-binaries/raw/master/"
|
|
url = _url_root + fname
|
|
nfname = op.normcase(fname) # convert to native
|
|
# Get dirs to look for the resource
|
|
given_directory = directory
|
|
directory = given_directory or appdata_dir("imageio")
|
|
dirs = resource_dirs()
|
|
dirs.insert(0, directory) # Given dir has preference
|
|
# Try to find the resource locally
|
|
for dir in dirs:
|
|
filename = op.join(dir, nfname)
|
|
if op.isfile(filename):
|
|
if not force_download: # we're done
|
|
if given_directory and given_directory != dir:
|
|
filename2 = os.path.join(given_directory, nfname)
|
|
# Make sure the output directory exists
|
|
if not op.isdir(op.dirname(filename2)):
|
|
os.makedirs(op.abspath(op.dirname(filename2)))
|
|
shutil.copy(filename, filename2)
|
|
return filename2
|
|
return filename
|
|
if isinstance(force_download, str):
|
|
ntime = time.strptime(force_download, "%Y-%m-%d")
|
|
ftime = time.gmtime(op.getctime(filename))
|
|
if ftime >= ntime:
|
|
if given_directory and given_directory != dir:
|
|
filename2 = os.path.join(given_directory, nfname)
|
|
# Make sure the output directory exists
|
|
if not op.isdir(op.dirname(filename2)):
|
|
os.makedirs(op.abspath(op.dirname(filename2)))
|
|
shutil.copy(filename, filename2)
|
|
return filename2
|
|
return filename
|
|
else:
|
|
print("File older than %s, updating..." % force_download)
|
|
break
|
|
|
|
# If we get here, we're going to try to download the file
|
|
if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"):
|
|
raise InternetNotAllowedError(
|
|
"Will not download resource from the "
|
|
"internet because environment variable "
|
|
"IMAGEIO_NO_INTERNET is set."
|
|
)
|
|
|
|
# Can we proceed with auto-download?
|
|
if not auto:
|
|
raise NeedDownloadError()
|
|
|
|
# Get filename to store to and make sure the dir exists
|
|
filename = op.join(directory, nfname)
|
|
if not op.isdir(op.dirname(filename)):
|
|
os.makedirs(op.abspath(op.dirname(filename)))
|
|
# let's go get the file
|
|
if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover
|
|
# On Travis, we retry a few times ...
|
|
for i in range(2):
|
|
try:
|
|
_fetch_file(url, filename)
|
|
return filename
|
|
except IOError:
|
|
time.sleep(0.5)
|
|
else:
|
|
_fetch_file(url, filename)
|
|
return filename
|
|
else: # pragma: no cover
|
|
_fetch_file(url, filename)
|
|
return filename
|
|
|
|
|
|
def _fetch_file(url, file_name, print_destination=True):
|
|
"""Load requested file, downloading it if needed or requested
|
|
|
|
Parameters
|
|
----------
|
|
url: string
|
|
The url of file to be downloaded.
|
|
file_name: string
|
|
Name, along with the path, of where downloaded file will be saved.
|
|
print_destination: bool, optional
|
|
If true, destination of where file was saved will be printed after
|
|
download finishes.
|
|
resume: bool, optional
|
|
If true, try to resume partially downloaded files.
|
|
"""
|
|
# Adapted from NISL:
|
|
# https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
|
|
|
|
print(
|
|
"Imageio: %r was not found on your computer; "
|
|
"downloading it now." % os.path.basename(file_name)
|
|
)
|
|
|
|
temp_file_name = file_name + ".part"
|
|
local_file = None
|
|
initial_size = 0
|
|
errors = []
|
|
for tries in range(4):
|
|
try:
|
|
# Checking file size and displaying it alongside the download url
|
|
remote_file = urlopen(url, timeout=5.0)
|
|
file_size = int(remote_file.headers["Content-Length"].strip())
|
|
size_str = _sizeof_fmt(file_size)
|
|
print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str))
|
|
# Downloading data (can be extended to resume if need be)
|
|
local_file = open(temp_file_name, "wb")
|
|
_chunk_read(remote_file, local_file, initial_size=initial_size)
|
|
# temp file must be closed prior to the move
|
|
if not local_file.closed:
|
|
local_file.close()
|
|
shutil.move(temp_file_name, file_name)
|
|
if print_destination is True:
|
|
sys.stdout.write("File saved as %s.\n" % file_name)
|
|
break
|
|
except Exception as e:
|
|
errors.append(e)
|
|
print("Error while fetching file: %s." % str(e))
|
|
finally:
|
|
if local_file is not None:
|
|
if not local_file.closed:
|
|
local_file.close()
|
|
else:
|
|
raise IOError(
|
|
"Unable to download %r. Perhaps there is a no internet "
|
|
"connection? If there is, please report this problem."
|
|
% os.path.basename(file_name)
|
|
)
|
|
|
|
|
|
def _chunk_read(response, local_file, chunk_size=8192, initial_size=0):
|
|
"""Download a file chunk by chunk and show advancement
|
|
|
|
Can also be used when resuming downloads over http.
|
|
|
|
Parameters
|
|
----------
|
|
response: urllib.response.addinfourl
|
|
Response to the download request in order to get file size.
|
|
local_file: file
|
|
Hard disk file where data should be written.
|
|
chunk_size: integer, optional
|
|
Size of downloaded chunks. Default: 8192
|
|
initial_size: int, optional
|
|
If resuming, indicate the initial size of the file.
|
|
"""
|
|
# Adapted from NISL:
|
|
# https://github.com/nisl/tutorial/blob/master/nisl/datasets.py
|
|
|
|
bytes_so_far = initial_size
|
|
# Returns only amount left to download when resuming, not the size of the
|
|
# entire file
|
|
total_size = int(response.headers["Content-Length"].strip())
|
|
total_size += initial_size
|
|
|
|
progress = StdoutProgressIndicator("Downloading")
|
|
progress.start("", "bytes", total_size)
|
|
|
|
while True:
|
|
chunk = response.read(chunk_size)
|
|
bytes_so_far += len(chunk)
|
|
if not chunk:
|
|
break
|
|
_chunk_write(chunk, local_file, progress)
|
|
progress.finish("Done")
|
|
|
|
|
|
def _chunk_write(chunk, local_file, progress):
|
|
"""Write a chunk to file and update the progress bar"""
|
|
local_file.write(chunk)
|
|
progress.increase_progress(len(chunk))
|
|
time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes
|
|
|
|
|
|
def _sizeof_fmt(num):
|
|
"""Turn number of bytes into human-readable str"""
|
|
units = ["bytes", "kB", "MB", "GB", "TB", "PB"]
|
|
decimals = [0, 0, 1, 2, 2, 2]
|
|
"""Human friendly file size"""
|
|
if num > 1:
|
|
exponent = min(int(log(num, 1024)), len(units) - 1)
|
|
quotient = float(num) / 1024 ** exponent
|
|
unit = units[exponent]
|
|
num_decimals = decimals[exponent]
|
|
format_string = "{0:.%sf} {1}" % num_decimals
|
|
return format_string.format(quotient, unit)
|
|
return "0 bytes" if num == 0 else "1 byte"
|