import math import numpy as np from numpy.linalg import inv, pinv from scipy import optimize from .._shared.utils import check_random_state def _check_data_dim(data, dim): if data.ndim != 2 or data.shape[1] != dim: raise ValueError('Input data must have shape (N, %d).' % dim) def _check_data_atleast_2D(data): if data.ndim < 2 or data.shape[1] < 2: raise ValueError('Input data must be at least 2D.') def _norm_along_axis(x, axis): """NumPy < 1.8 does not support the `axis` argument for `np.linalg.norm`.""" return np.sqrt(np.einsum('ij,ij->i', x, x)) class BaseModel(object): def __init__(self): self.params = None class LineModelND(BaseModel): """Total least squares estimator for N-dimensional lines. In contrast to ordinary least squares line estimation, this estimator minimizes the orthogonal distances of points to the estimated line. Lines are defined by a point (origin) and a unit vector (direction) according to the following vector equation:: X = origin + lambda * direction Attributes ---------- params : tuple Line model parameters in the following order `origin`, `direction`. Examples -------- >>> x = np.linspace(1, 2, 25) >>> y = 1.5 * x + 3 >>> lm = LineModelND() >>> lm.estimate(np.array([x, y]).T) True >>> tuple(np.round(lm.params, 5)) (array([1.5 , 5.25]), array([0.5547 , 0.83205])) >>> res = lm.residuals(np.array([x, y]).T) >>> np.abs(np.round(res, 9)) array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) >>> np.round(lm.predict_y(x[:5]), 3) array([4.5 , 4.562, 4.625, 4.688, 4.75 ]) >>> np.round(lm.predict_x(y[:5]), 3) array([1. , 1.042, 1.083, 1.125, 1.167]) """ def estimate(self, data): """Estimate line model from data. This minimizes the sum of shortest (orthogonal) distances from the given data points to the estimated line. Parameters ---------- data : (N, dim) array N points in a space of dimensionality dim >= 2. Returns ------- success : bool True, if model estimation succeeds. """ _check_data_atleast_2D(data) origin = data.mean(axis=0) data = data - origin if data.shape[0] == 2: # well determined direction = data[1] - data[0] norm = np.linalg.norm(direction) if norm != 0: # this should not happen to be norm 0 direction /= norm elif data.shape[0] > 2: # over-determined # Note: with full_matrices=1 Python dies with joblib parallel_for. _, _, v = np.linalg.svd(data, full_matrices=False) direction = v[0] else: # under-determined raise ValueError('At least 2 input points needed.') self.params = (origin, direction) return True def residuals(self, data, params=None): """Determine residuals of data to model. For each point, the shortest (orthogonal) distance to the line is returned. It is obtained by projecting the data onto the line. Parameters ---------- data : (N, dim) array N points in a space of dimension dim. params : (2, ) array, optional Optional custom parameter set in the form (`origin`, `direction`). Returns ------- residuals : (N, ) array Residual for each data point. """ _check_data_atleast_2D(data) if params is None: if self.params is None: raise ValueError('Parameters cannot be None') params = self.params if len(params) != 2: raise ValueError('Parameters are defined by 2 sets.') origin, direction = params res = (data - origin) - \ ((data - origin) @ direction)[..., np.newaxis] * direction return _norm_along_axis(res, axis=1) def predict(self, x, axis=0, params=None): """Predict intersection of the estimated line model with a hyperplane orthogonal to a given axis. Parameters ---------- x : (n, 1) array Coordinates along an axis. axis : int Axis orthogonal to the hyperplane intersecting the line. params : (2, ) array, optional Optional custom parameter set in the form (`origin`, `direction`). Returns ------- data : (n, m) array Predicted coordinates. Raises ------ ValueError If the line is parallel to the given axis. """ if params is None: if self.params is None: raise ValueError('Parameters cannot be None') params = self.params if len(params) != 2: raise ValueError('Parameters are defined by 2 sets.') origin, direction = params if direction[axis] == 0: # line parallel to axis raise ValueError('Line parallel to axis %s' % axis) l = (x - origin[axis]) / direction[axis] data = origin + l[..., np.newaxis] * direction return data def predict_x(self, y, params=None): """Predict x-coordinates for 2D lines using the estimated model. Alias for:: predict(y, axis=1)[:, 0] Parameters ---------- y : array y-coordinates. params : (2, ) array, optional Optional custom parameter set in the form (`origin`, `direction`). Returns ------- x : array Predicted x-coordinates. """ x = self.predict(y, axis=1, params=params)[:, 0] return x def predict_y(self, x, params=None): """Predict y-coordinates for 2D lines using the estimated model. Alias for:: predict(x, axis=0)[:, 1] Parameters ---------- x : array x-coordinates. params : (2, ) array, optional Optional custom parameter set in the form (`origin`, `direction`). Returns ------- y : array Predicted y-coordinates. """ y = self.predict(x, axis=0, params=params)[:, 1] return y class CircleModel(BaseModel): """Total least squares estimator for 2D circles. The functional model of the circle is:: r**2 = (x - xc)**2 + (y - yc)**2 This estimator minimizes the squared distances from all points to the circle:: min{ sum((r - sqrt((x_i - xc)**2 + (y_i - yc)**2))**2) } A minimum number of 3 points is required to solve for the parameters. Attributes ---------- params : tuple Circle model parameters in the following order `xc`, `yc`, `r`. Examples -------- >>> t = np.linspace(0, 2 * np.pi, 25) >>> xy = CircleModel().predict_xy(t, params=(2, 3, 4)) >>> model = CircleModel() >>> model.estimate(xy) True >>> tuple(np.round(model.params, 5)) (2.0, 3.0, 4.0) >>> res = model.residuals(xy) >>> np.abs(np.round(res, 9)) array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) """ def estimate(self, data): """Estimate circle model from data using total least squares. Parameters ---------- data : (N, 2) array N points with ``(x, y)`` coordinates, respectively. Returns ------- success : bool True, if model estimation succeeds. """ _check_data_dim(data, dim=2) x = data[:, 0] y = data[:, 1] # http://www.had2know.com/academics/best-fit-circle-least-squares.html x2y2 = (x ** 2 + y ** 2) sum_x = np.sum(x) sum_y = np.sum(y) sum_xy = np.sum(x * y) m1 = np.array([[np.sum(x ** 2), sum_xy, sum_x], [sum_xy, np.sum(y ** 2), sum_y], [sum_x, sum_y, float(len(x))]]) m2 = np.array([[np.sum(x * x2y2), np.sum(y * x2y2), np.sum(x2y2)]]).T a, b, c = pinv(m1) @ m2 a, b, c = a[0], b[0], c[0] xc = a / 2 yc = b / 2 r = np.sqrt(4 * c + a ** 2 + b ** 2) / 2 self.params = (xc, yc, r) return True def residuals(self, data): """Determine residuals of data to model. For each point the shortest distance to the circle is returned. Parameters ---------- data : (N, 2) array N points with ``(x, y)`` coordinates, respectively. Returns ------- residuals : (N, ) array Residual for each data point. """ _check_data_dim(data, dim=2) xc, yc, r = self.params x = data[:, 0] y = data[:, 1] return r - np.sqrt((x - xc)**2 + (y - yc)**2) def predict_xy(self, t, params=None): """Predict x- and y-coordinates using the estimated model. Parameters ---------- t : array Angles in circle in radians. Angles start to count from positive x-axis to positive y-axis in a right-handed system. params : (3, ) array, optional Optional custom parameter set. Returns ------- xy : (..., 2) array Predicted x- and y-coordinates. """ if params is None: params = self.params xc, yc, r = params x = xc + r * np.cos(t) y = yc + r * np.sin(t) return np.concatenate((x[..., None], y[..., None]), axis=t.ndim) class EllipseModel(BaseModel): """Total least squares estimator for 2D ellipses. The functional model of the ellipse is:: xt = xc + a*cos(theta)*cos(t) - b*sin(theta)*sin(t) yt = yc + a*sin(theta)*cos(t) + b*cos(theta)*sin(t) d = sqrt((x - xt)**2 + (y - yt)**2) where ``(xt, yt)`` is the closest point on the ellipse to ``(x, y)``. Thus d is the shortest distance from the point to the ellipse. The estimator is based on a least squares minimization. The optimal solution is computed directly, no iterations are required. This leads to a simple, stable and robust fitting method. The ``params`` attribute contains the parameters in the following order:: xc, yc, a, b, theta Attributes ---------- params : tuple Ellipse model parameters in the following order `xc`, `yc`, `a`, `b`, `theta`. Examples -------- >>> xy = EllipseModel().predict_xy(np.linspace(0, 2 * np.pi, 25), ... params=(10, 15, 4, 8, np.deg2rad(30))) >>> ellipse = EllipseModel() >>> ellipse.estimate(xy) True >>> np.round(ellipse.params, 2) array([10. , 15. , 4. , 8. , 0.52]) >>> np.round(abs(ellipse.residuals(xy)), 5) array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) """ def estimate(self, data): """Estimate circle model from data using total least squares. Parameters ---------- data : (N, 2) array N points with ``(x, y)`` coordinates, respectively. Returns ------- success : bool True, if model estimation succeeds. References ---------- .. [1] Halir, R.; Flusser, J. "Numerically stable direct least squares fitting of ellipses". In Proc. 6th International Conference in Central Europe on Computer Graphics and Visualization. WSCG (Vol. 98, pp. 125-132). """ # Original Implementation: Ben Hammel, Nick Sullivan-Molina # another REFERENCE: [2] http://mathworld.wolfram.com/Ellipse.html _check_data_dim(data, dim=2) x = data[:, 0] y = data[:, 1] # Quadratic part of design matrix [eqn. 15] from [1] D1 = np.vstack([x ** 2, x * y, y ** 2]).T # Linear part of design matrix [eqn. 16] from [1] D2 = np.vstack([x, y, np.ones(len(x))]).T # forming scatter matrix [eqn. 17] from [1] S1 = D1.T @ D1 S2 = D1.T @ D2 S3 = D2.T @ D2 # Constraint matrix [eqn. 18] C1 = np.array([[0., 0., 2.], [0., -1., 0.], [2., 0., 0.]]) try: # Reduced scatter matrix [eqn. 29] M = inv(C1) @ (S1 - S2 @ inv(S3) @ S2.T) except np.linalg.LinAlgError: # LinAlgError: Singular matrix return False # M*|a b c >=l|a b c >. Find eigenvalues and eigenvectors # from this equation [eqn. 28] eig_vals, eig_vecs = np.linalg.eig(M) # eigenvector must meet constraint 4ac - b^2 to be valid. cond = 4 * np.multiply(eig_vecs[0, :], eig_vecs[2, :]) \ - np.power(eig_vecs[1, :], 2) a1 = eig_vecs[:, (cond > 0)] # seeks for empty matrix if 0 in a1.shape or len(a1.ravel()) != 3: return False a, b, c = a1.ravel() # |d f g> = -S3^(-1)*S2^(T)*|a b c> [eqn. 24] a2 = -inv(S3) @ S2.T @ a1 d, f, g = a2.ravel() # eigenvectors are the coefficients of an ellipse in general form # a*x^2 + 2*b*x*y + c*y^2 + 2*d*x + 2*f*y + g = 0 (eqn. 15) from [2] b /= 2. d /= 2. f /= 2. # finding center of ellipse [eqn.19 and 20] from [2] x0 = (c * d - b * f) / (b ** 2. - a * c) y0 = (a * f - b * d) / (b ** 2. - a * c) # Find the semi-axes lengths [eqn. 21 and 22] from [2] numerator = a * f ** 2 + c * d ** 2 + g * b ** 2 \ - 2 * b * d * f - a * c * g term = np.sqrt((a - c) ** 2 + 4 * b ** 2) denominator1 = (b ** 2 - a * c) * (term - (a + c)) denominator2 = (b ** 2 - a * c) * (- term - (a + c)) width = np.sqrt(2 * numerator / denominator1) height = np.sqrt(2 * numerator / denominator2) # angle of counterclockwise rotation of major-axis of ellipse # to x-axis [eqn. 23] from [2]. phi = 0.5 * np.arctan((2. * b) / (a - c)) if a > c: phi += 0.5 * np.pi self.params = np.nan_to_num([x0, y0, width, height, phi]).tolist() self.params = [float(np.real(x)) for x in self.params] return True def residuals(self, data): """Determine residuals of data to model. For each point the shortest distance to the ellipse is returned. Parameters ---------- data : (N, 2) array N points with ``(x, y)`` coordinates, respectively. Returns ------- residuals : (N, ) array Residual for each data point. """ _check_data_dim(data, dim=2) xc, yc, a, b, theta = self.params ctheta = math.cos(theta) stheta = math.sin(theta) x = data[:, 0] y = data[:, 1] N = data.shape[0] def fun(t, xi, yi): ct = math.cos(t) st = math.sin(t) xt = xc + a * ctheta * ct - b * stheta * st yt = yc + a * stheta * ct + b * ctheta * st return (xi - xt) ** 2 + (yi - yt) ** 2 # def Dfun(t, xi, yi): # ct = math.cos(t) # st = math.sin(t) # xt = xc + a * ctheta * ct - b * stheta * st # yt = yc + a * stheta * ct + b * ctheta * st # dfx_t = - 2 * (xi - xt) * (- a * ctheta * st # - b * stheta * ct) # dfy_t = - 2 * (yi - yt) * (- a * stheta * st # + b * ctheta * ct) # return [dfx_t + dfy_t] residuals = np.empty((N, ), dtype=np.double) # initial guess for parameter t of closest point on ellipse t0 = np.arctan2(y - yc, x - xc) - theta # determine shortest distance to ellipse for each point for i in range(N): xi = x[i] yi = y[i] # faster without Dfun, because of the python overhead t, _ = optimize.leastsq(fun, t0[i], args=(xi, yi)) residuals[i] = np.sqrt(fun(t, xi, yi)) return residuals def predict_xy(self, t, params=None): """Predict x- and y-coordinates using the estimated model. Parameters ---------- t : array Angles in circle in radians. Angles start to count from positive x-axis to positive y-axis in a right-handed system. params : (5, ) array, optional Optional custom parameter set. Returns ------- xy : (..., 2) array Predicted x- and y-coordinates. """ if params is None: params = self.params xc, yc, a, b, theta = params ct = np.cos(t) st = np.sin(t) ctheta = math.cos(theta) stheta = math.sin(theta) x = xc + a * ctheta * ct - b * stheta * st y = yc + a * stheta * ct + b * ctheta * st return np.concatenate((x[..., None], y[..., None]), axis=t.ndim) def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability): """Determine number trials such that at least one outlier-free subset is sampled for the given inlier/outlier ratio. Parameters ---------- n_inliers : int Number of inliers in the data. n_samples : int Total number of samples in the data. min_samples : int Minimum number of samples chosen randomly from original data. probability : float Probability (confidence) that one outlier-free sample is generated. Returns ------- trials : int Number of trials. """ if n_inliers == 0: return np.inf nom = 1 - probability if nom == 0: return np.inf inlier_ratio = n_inliers / float(n_samples) denom = 1 - inlier_ratio ** min_samples if denom == 0: return 1 elif denom == 1: return np.inf nom = np.log(nom) denom = np.log(denom) if denom == 0: return 0 return int(np.ceil(nom / denom)) def ransac(data, model_class, min_samples, residual_threshold, is_data_valid=None, is_model_valid=None, max_trials=100, stop_sample_num=np.inf, stop_residuals_sum=0, stop_probability=1, random_state=None, initial_inliers=None): """Fit a model to data with the RANSAC (random sample consensus) algorithm. RANSAC is an iterative algorithm for the robust estimation of parameters from a subset of inliers from the complete data set. Each iteration performs the following tasks: 1. Select `min_samples` random samples from the original data and check whether the set of data is valid (see `is_data_valid`). 2. Estimate a model to the random subset (`model_cls.estimate(*data[random_subset]`) and check whether the estimated model is valid (see `is_model_valid`). 3. Classify all data as inliers or outliers by calculating the residuals to the estimated model (`model_cls.residuals(*data)`) - all data samples with residuals smaller than the `residual_threshold` are considered as inliers. 4. Save estimated model as best model if number of inlier samples is maximal. In case the current estimated model has the same number of inliers, it is only considered as the best model if it has less sum of residuals. These steps are performed either a maximum number of times or until one of the special stop criteria are met. The final model is estimated using all inlier samples of the previously determined best model. Parameters ---------- data : [list, tuple of] (N, ...) array Data set to which the model is fitted, where N is the number of data points and the remaining dimension are depending on model requirements. If the model class requires multiple input data arrays (e.g. source and destination coordinates of ``skimage.transform.AffineTransform``), they can be optionally passed as tuple or list. Note, that in this case the functions ``estimate(*data)``, ``residuals(*data)``, ``is_model_valid(model, *random_data)`` and ``is_data_valid(*random_data)`` must all take each data array as separate arguments. model_class : object Object with the following object methods: * ``success = estimate(*data)`` * ``residuals(*data)`` where `success` indicates whether the model estimation succeeded (`True` or `None` for success, `False` for failure). min_samples : int in range (0, N) The minimum number of data points to fit a model to. residual_threshold : float larger than 0 Maximum distance for a data point to be classified as an inlier. is_data_valid : function, optional This function is called with the randomly selected data before the model is fitted to it: `is_data_valid(*random_data)`. is_model_valid : function, optional This function is called with the estimated model and the randomly selected data: `is_model_valid(model, *random_data)`, . max_trials : int, optional Maximum number of iterations for random sample selection. stop_sample_num : int, optional Stop iteration if at least this number of inliers are found. stop_residuals_sum : float, optional Stop iteration if sum of residuals is less than or equal to this threshold. stop_probability : float in range [0, 1], optional RANSAC iteration stops if at least one outlier-free set of the training data is sampled with ``probability >= stop_probability``, depending on the current best model's inlier ratio and the number of trials. This requires to generate at least N samples (trials): N >= log(1 - probability) / log(1 - e**m) where the probability (confidence) is typically set to a high value such as 0.99, e is the current fraction of inliers w.r.t. the total number of samples, and m is the min_samples value. random_state : int, RandomState instance or None, optional If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. initial_inliers : array-like of bool, shape (N,), optional Initial samples selection for model estimation Returns ------- model : object Best model with largest consensus set. inliers : (N, ) array Boolean mask of inliers classified as ``True``. References ---------- .. [1] "RANSAC", Wikipedia, https://en.wikipedia.org/wiki/RANSAC Examples -------- Generate ellipse data without tilt and add noise: >>> t = np.linspace(0, 2 * np.pi, 50) >>> xc, yc = 20, 30 >>> a, b = 5, 10 >>> x = xc + a * np.cos(t) >>> y = yc + b * np.sin(t) >>> data = np.column_stack([x, y]) >>> np.random.seed(seed=1234) >>> data += np.random.normal(size=data.shape) Add some faulty data: >>> data[0] = (100, 100) >>> data[1] = (110, 120) >>> data[2] = (120, 130) >>> data[3] = (140, 130) Estimate ellipse model using all available data: >>> model = EllipseModel() >>> model.estimate(data) True >>> np.round(model.params) # doctest: +SKIP array([ 72., 75., 77., 14., 1.]) Estimate ellipse model using RANSAC: >>> ransac_model, inliers = ransac(data, EllipseModel, 20, 3, max_trials=50) >>> abs(np.round(ransac_model.params)) array([20., 30., 5., 10., 0.]) >>> inliers # doctest: +SKIP array([False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True], dtype=bool) >>> sum(inliers) > 40 True RANSAC can be used to robustly estimate a geometric transformation. In this section, we also show how to use a proportion of the total samples, rather than an absolute number. >>> from skimage.transform import SimilarityTransform >>> np.random.seed(0) >>> src = 100 * np.random.rand(50, 2) >>> model0 = SimilarityTransform(scale=0.5, rotation=1, translation=(10, 20)) >>> dst = model0(src) >>> dst[0] = (10000, 10000) >>> dst[1] = (-100, 100) >>> dst[2] = (50, 50) >>> ratio = 0.5 # use half of the samples >>> min_samples = int(ratio * len(src)) >>> model, inliers = ransac((src, dst), SimilarityTransform, min_samples, 10, ... initial_inliers=np.ones(len(src), dtype=bool)) >>> inliers array([False, False, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]) """ best_model = None best_inlier_num = 0 best_inlier_residuals_sum = np.inf best_inliers = None random_state = check_random_state(random_state) # in case data is not pair of input and output, male it like it if not isinstance(data, (tuple, list)): data = (data, ) num_samples = len(data[0]) if not (0 < min_samples < num_samples): raise ValueError("`min_samples` must be in range (0, )") if residual_threshold < 0: raise ValueError("`residual_threshold` must be greater than zero") if max_trials < 0: raise ValueError("`max_trials` must be greater than zero") if not (0 <= stop_probability <= 1): raise ValueError("`stop_probability` must be in range [0, 1]") if initial_inliers is not None and len(initial_inliers) != num_samples: raise ValueError("RANSAC received a vector of initial inliers (length %i)" " that didn't match the number of samples (%i)." " The vector of initial inliers should have the same length" " as the number of samples and contain only True (this sample" " is an initial inlier) and False (this one isn't) values." % (len(initial_inliers), num_samples)) # for the first run use initial guess of inliers spl_idxs = (initial_inliers if initial_inliers is not None else random_state.choice(num_samples, min_samples, replace=False)) for num_trials in range(max_trials): # do sample selection according data pairs samples = [d[spl_idxs] for d in data] # for next iteration choose random sample set and be sure that no samples repeat spl_idxs = random_state.choice(num_samples, min_samples, replace=False) # optional check if random sample set is valid if is_data_valid is not None and not is_data_valid(*samples): continue # estimate model for current random sample set sample_model = model_class() success = sample_model.estimate(*samples) # backwards compatibility if success is not None and not success: continue # optional check if estimated model is valid if is_model_valid is not None and not is_model_valid(sample_model, *samples): continue sample_model_residuals = np.abs(sample_model.residuals(*data)) # consensus set / inliers sample_model_inliers = sample_model_residuals < residual_threshold sample_model_residuals_sum = np.sum(sample_model_residuals ** 2) # choose as new best model if number of inliers is maximal sample_inlier_num = np.sum(sample_model_inliers) if ( # more inliers sample_inlier_num > best_inlier_num # same number of inliers but less "error" in terms of residuals or (sample_inlier_num == best_inlier_num and sample_model_residuals_sum < best_inlier_residuals_sum) ): best_model = sample_model best_inlier_num = sample_inlier_num best_inlier_residuals_sum = sample_model_residuals_sum best_inliers = sample_model_inliers dynamic_max_trials = _dynamic_max_trials(best_inlier_num, num_samples, min_samples, stop_probability) if (best_inlier_num >= stop_sample_num or best_inlier_residuals_sum <= stop_residuals_sum or num_trials >= dynamic_max_trials): break # estimate final model using all inliers if best_inliers is not None: # select inliers for each data array data_inliers = [d[best_inliers] for d in data] best_model.estimate(*data_inliers) return best_model, best_inliers