import itertools
from contracts import check_multiple
from contracts import contract
from geometry import logger, eigh
from geometry.formatting import formatm
from geometry.procrustes import best_similarity_transform
from geometry.spheres import project_vectors_onto_sphere
from geometry.utils.numpy_backport import assert_allclose
import numpy as np
[docs]@contract(S='array[KxN]', returns='array[NxN](>=0)')
def euclidean_distances(S):
''' Computes the euclidean distance matrix for the given points. '''
K, N = S.shape
D = np.zeros((N, N))
for i in range(N):
p = S[:, i]
pp = np.tile(p, (N, 1)).T
assert pp.shape == (K, N)
d2 = ((S - pp) * (S - pp)).sum(axis=0)
d2 = np.maximum(d2, 0)
D[i, :] = np.sqrt(d2)
return D
[docs]def double_center(P):
n = P.shape[0]
grand_mean = P.mean()
row_mean = np.zeros(n)
col_mean = np.zeros(n)
for i in range(n):
row_mean[i] = P[i, :].mean()
col_mean[i] = P[:, i].mean()
R = row_mean.reshape(n, 1).repeat(n, axis=1)
assert R.shape == (n, n)
C = col_mean.reshape(1, n).repeat(n, axis=0)
assert C.shape == (n, n)
B2 = -0.5 * (P - R - C + grand_mean)
if False:
B = np.zeros(P.shape)
for i, j in itertools.product(range(n), range(n)):
B[i, j] = -0.5 * (P[i, j] - col_mean[j] - row_mean[i] + grand_mean)
assert_allclose(B2, B)
return B2
[docs]@contract(C='array[NxN]', ndim='int,>0,K', returns='array[KxN]')
def inner_product_embedding_slow(C, ndim):
U, S, V = np.linalg.svd(C, full_matrices=0)
check_multiple([('array[NxN]', U),
('array[N]', S),
('array[NxN]', V)])
coords = V[:ndim, :]
for i in range(ndim):
coords[i, :] = coords[i, :] * np.sqrt(S[i])
return coords
[docs]@contract(C='array[NxN]', ndim='int,>1,K', returns='array[KxN]')
def inner_product_embedding(C, ndim):
n = C.shape[0]
if ndim > n:
msg = 'Number of points: %s Dimensions: %s' % (n, ndim)
raise ValueError(msg)
eigvals = (n - ndim, n - 1)
print n, eigvals
S, V = eigh(C, eigvals=eigvals)
assert S[0] <= S[1] # eigh returns in ascending order
if np.any(S < 0):
msg = 'The cosine matrix singular values are not all positive: \n'
msg += formatm('S', S)
msg += 'I assume it is rounding error and approximate with:\n'
S[S < 0] = 0
msg += formatm('S\'', S)
logger.warning(msg)
assert V.shape == (n, ndim)
assert S.shape == (ndim,)
# check_multiple([('K', ndim),
# ('array[NxK]', V),
# ('array[K]', S)])
coords = V.T
for i in range(ndim):
assert S[i] >= 0
coords[i, :] = coords[i, :] * np.sqrt(S[i])
return coords
[docs]def truncated_svd_randomized(M, k):
''' Truncated SVD based on randomized projections. '''
p = k + 5 # TODO: add parameter
Y = np.dot(M, np.random.normal(size=(M.shape[1], p)))
Q, r = np.linalg.qr(Y) # @UnusedVariable
B = np.dot(Q.T, M)
Uhat, s, v = np.linalg.svd(B, full_matrices=False)
U = np.dot(Q, Uhat)
return U.T[:k].T, s[:k], v[:k]
[docs]@contract(C='array[NxN]', ndim='int,>0,K', returns='array[KxN]')
def inner_product_embedding_randomized(C, ndim):
'''
Best embedding of inner product matrix based on
randomized projections.
'''
U, S, V = truncated_svd_randomized(C, ndim) # @UnusedVariable.
check_multiple([('K', ndim),
('array[KxN]', V),
('array[K]', S)])
coords = V
for i in range(ndim):
coords[i, :] = coords[i, :] * np.sqrt(S[i])
return coords
[docs]@contract(D='distance_matrix,array[MxM](>=0)', ndim='K,int,>=1', returns='array[KxM]')
def mds(D, ndim, embed=inner_product_embedding):
# if D.dtype != np.float64:
# D = D.astype(np.float64)
diag = D.diagonal()
# the diagonal should be zero
if not np.allclose(diag, 0):
msg = 'The diagonal of the distance matrix should be zero.'
msg += 'Here are all the entries: %s' % diag.tolist()
raise ValueError(diag)
# assert_allclose(diag, 0, atol=1e-09)
# Find centered cosine matrix
P = D * D
B = double_center(P)
return embed(B, ndim)
[docs]@contract(D='distance_matrix,array[MxM](>=0)', ndim='K,int,>=1', returns='array[KxM]')
def mds_randomized(D, ndim):
''' MDS based on randomized projections. '''
return mds(D, ndim, embed=inner_product_embedding_randomized)
[docs]@contract(C='array[NxN]', ndim='int,>0,K', returns='array[KxN],directions')
def spherical_mds(C, ndim, embed=inner_product_embedding):
# TODO: check cosines
coords = embed(C, ndim)
proj = project_vectors_onto_sphere(coords)
return proj
# TODO: spherical_mds_randomized
best_embedding_on_sphere = spherical_mds
[docs]@contract(references='array[KxN]', distances='array[N](>=0)')
def place(references, distances):
# TODO: docs
K, N = references.shape
# First do MDS on all data
D = np.zeros((N + 1, N + 1))
D[:N, :N] = euclidean_distances(references)
D[N, N] = 0
D[N, :N] = distances
D[:N, N] = distances
Sm = mds(D, K)
# Only if perfect
# Dm = euclidean_distances(Sm)
# assert_almost_equal(D[:N, :N], Dm[:N, :N])
# new in other frame
R, t = best_similarity_transform(Sm[:, :N], references)
Sm_aligned = np.dot(R, Sm) + t
result = Sm_aligned[:, N]
return result