# -*- coding: utf-8 -*- # Based on code from the vispy project # Distributed under the (new) BSD License. See LICENSE.txt for more info. """Data downloading and reading functions """ from math import log import os from os import path as op import sys import shutil import time from . import appdata_dir, resource_dirs from . import StdoutProgressIndicator, urlopen class InternetNotAllowedError(IOError): """ Plugins that need resources can just use get_remote_file(), but should catch this error and silently ignore it. """ pass class NeedDownloadError(IOError): """ Is raised when a remote file is requested that is not locally available, but which needs to be explicitly downloaded by the user. """ def get_remote_file(fname, directory=None, force_download=False, auto=True): """ Get a the filename for the local version of a file from the web Parameters ---------- fname : str The relative filename on the remote data repository to download. These correspond to paths on ``https://github.com/imageio/imageio-binaries/``. directory : str | None The directory where the file will be cached if a download was required to obtain the file. By default, the appdata directory is used. This is also the first directory that is checked for a local version of the file. If the directory does not exist, it will be created. force_download : bool | str If True, the file will be downloaded even if a local copy exists (and this copy will be overwritten). Can also be a YYYY-MM-DD date to ensure a file is up-to-date (modified date of a file on disk, if present, is checked). auto : bool Whether to auto-download the file if its not present locally. Default True. If False and a download is needed, raises NeedDownloadError. Returns ------- fname : str The path to the file on the local system. """ _url_root = "https://github.com/imageio/imageio-binaries/raw/master/" url = _url_root + fname nfname = op.normcase(fname) # convert to native # Get dirs to look for the resource given_directory = directory directory = given_directory or appdata_dir("imageio") dirs = resource_dirs() dirs.insert(0, directory) # Given dir has preference # Try to find the resource locally for dir in dirs: filename = op.join(dir, nfname) if op.isfile(filename): if not force_download: # we're done if given_directory and given_directory != dir: filename2 = os.path.join(given_directory, nfname) # Make sure the output directory exists if not op.isdir(op.dirname(filename2)): os.makedirs(op.abspath(op.dirname(filename2))) shutil.copy(filename, filename2) return filename2 return filename if isinstance(force_download, str): ntime = time.strptime(force_download, "%Y-%m-%d") ftime = time.gmtime(op.getctime(filename)) if ftime >= ntime: if given_directory and given_directory != dir: filename2 = os.path.join(given_directory, nfname) # Make sure the output directory exists if not op.isdir(op.dirname(filename2)): os.makedirs(op.abspath(op.dirname(filename2))) shutil.copy(filename, filename2) return filename2 return filename else: print("File older than %s, updating..." % force_download) break # If we get here, we're going to try to download the file if os.getenv("IMAGEIO_NO_INTERNET", "").lower() in ("1", "true", "yes"): raise InternetNotAllowedError( "Will not download resource from the " "internet because environment variable " "IMAGEIO_NO_INTERNET is set." ) # Can we proceed with auto-download? if not auto: raise NeedDownloadError() # Get filename to store to and make sure the dir exists filename = op.join(directory, nfname) if not op.isdir(op.dirname(filename)): os.makedirs(op.abspath(op.dirname(filename))) # let's go get the file if os.getenv("CONTINUOUS_INTEGRATION", False): # pragma: no cover # On Travis, we retry a few times ... for i in range(2): try: _fetch_file(url, filename) return filename except IOError: time.sleep(0.5) else: _fetch_file(url, filename) return filename else: # pragma: no cover _fetch_file(url, filename) return filename def _fetch_file(url, file_name, print_destination=True): """Load requested file, downloading it if needed or requested Parameters ---------- url: string The url of file to be downloaded. file_name: string Name, along with the path, of where downloaded file will be saved. print_destination: bool, optional If true, destination of where file was saved will be printed after download finishes. resume: bool, optional If true, try to resume partially downloaded files. """ # Adapted from NISL: # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py print( "Imageio: %r was not found on your computer; " "downloading it now." % os.path.basename(file_name) ) temp_file_name = file_name + ".part" local_file = None initial_size = 0 errors = [] for tries in range(4): try: # Checking file size and displaying it alongside the download url remote_file = urlopen(url, timeout=5.0) file_size = int(remote_file.headers["Content-Length"].strip()) size_str = _sizeof_fmt(file_size) print("Try %i. Download from %s (%s)" % (tries + 1, url, size_str)) # Downloading data (can be extended to resume if need be) local_file = open(temp_file_name, "wb") _chunk_read(remote_file, local_file, initial_size=initial_size) # temp file must be closed prior to the move if not local_file.closed: local_file.close() shutil.move(temp_file_name, file_name) if print_destination is True: sys.stdout.write("File saved as %s.\n" % file_name) break except Exception as e: errors.append(e) print("Error while fetching file: %s." % str(e)) finally: if local_file is not None: if not local_file.closed: local_file.close() else: raise IOError( "Unable to download %r. Perhaps there is a no internet " "connection? If there is, please report this problem." % os.path.basename(file_name) ) def _chunk_read(response, local_file, chunk_size=8192, initial_size=0): """Download a file chunk by chunk and show advancement Can also be used when resuming downloads over http. Parameters ---------- response: urllib.response.addinfourl Response to the download request in order to get file size. local_file: file Hard disk file where data should be written. chunk_size: integer, optional Size of downloaded chunks. Default: 8192 initial_size: int, optional If resuming, indicate the initial size of the file. """ # Adapted from NISL: # https://github.com/nisl/tutorial/blob/master/nisl/datasets.py bytes_so_far = initial_size # Returns only amount left to download when resuming, not the size of the # entire file total_size = int(response.headers["Content-Length"].strip()) total_size += initial_size progress = StdoutProgressIndicator("Downloading") progress.start("", "bytes", total_size) while True: chunk = response.read(chunk_size) bytes_so_far += len(chunk) if not chunk: break _chunk_write(chunk, local_file, progress) progress.finish("Done") def _chunk_write(chunk, local_file, progress): """Write a chunk to file and update the progress bar""" local_file.write(chunk) progress.increase_progress(len(chunk)) time.sleep(0) # Give other threads a chance, e.g. those that handle stdout pipes def _sizeof_fmt(num): """Turn number of bytes into human-readable str""" units = ["bytes", "kB", "MB", "GB", "TB", "PB"] decimals = [0, 0, 1, 2, 2, 2] """Human friendly file size""" if num > 1: exponent = min(int(log(num, 1024)), len(units) - 1) quotient = float(num) / 1024 ** exponent unit = units[exponent] num_decimals = decimals[exponent] format_string = "{0:.%sf} {1}" % num_decimals return format_string.format(quotient, unit) return "0 bytes" if num == 0 else "1 byte"