Skip to content
Snippets Groups Projects
Commit 62bffdd4 authored by Maria Tarasevich's avatar Maria Tarasevich
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
main.py 0 → 100644
# vim: et sts=4 ts=4
import argparse
import numpy as np
from supersvd import supersvd
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--type", choices=['real', 'double'], default='real',
help="Data type, default is '%(default)s'")
parser.add_argument("-x", metavar="X.STD", required=True, help="X data input file name")
parser.add_argument("-y", metavar="Y.STD", required=True, help="Y data input file name")
parser.add_argument("-t", "--time", type=int, required=True, help="Length of the time interval")
parser.add_argument("-k", type=int, default=3,
help="Number of singular values, default is %(default)d")
parser.add_argument("-xv", help="X singular vectors output file name, if necessary")
parser.add_argument("-yv", help="Y singular vectors output file name, if necessary")
parser.add_argument("-xc", help="X time coefficients output file name, if necessary")
parser.add_argument("-yc", help="Y time coefficients output file name, if necessary")
parser.add_argument("-stat", help="Correlation and variance values in CSV, if necessary")
parser.add_argument("--dont-subtract-mean", dest="elim_mean", help="Disable subtracting of the time mean from input", action="store_false")
args = parser.parse_args()
if args.type == 'real':
dtype = np.float32
else:
dtype = np.float64
t = args.time
X = np.fromfile(args.x, dtype=dtype).reshape(t, -1)
Y = np.fromfile(args.y, dtype=dtype).reshape(t, -1)
svd = supersvd(X, Y, args.k, args.elim_mean)
if args.xv is not None:
svd.x_vect.tofile(args.xv)
if args.yv is not None:
svd.y_vect.tofile(args.yv)
if args.xc is not None:
svd.x_coeff.tofile(args.xc)
if args.yc is not None:
svd.y_coeff.tofile(args.yc)
if args.stat is not None:
f = open(args.stat, 'w')
f.write('number,corrcoeff,x_varfrac,y_varfrac,covfrac\n')
for i in range(args.k):
print('Singular value number:', i+1)
corrcoeff = 100 * svd.corrcoeff[i]
x_varfrac = 100 * svd.x_variance_fraction[i]
y_varfrac = 100 * svd.y_variance_fraction[i]
covfrac = 100 * svd.eigenvalue_fraction[i]
if args.stat is not None:
f.write('%d,%f,%f,%f,%f\n' % (i+1, corrcoeff, x_varfrac, y_varfrac, covfrac))
print('Time series correlation coefficient:', '%8.4f%%' % corrcoeff)
print(args.x, 'variance fraction:', '%8.4f%%' % x_varfrac)
print(args.y, 'variance fraction:', '%8.4f%%' % y_varfrac)
print('Covariance fraction:', '%8.4f%%' % covfrac)
if args.stat is not None:
f.close()
if __name__ == "__main__":
main()
# vim: ts=4 sts=4 et
import numpy as _np
from scipy.sparse import linalg as _spla
from collections import namedtuple
SuperSvdResult = namedtuple('SuperSvdResult', [
'x_coeff', 'y_coeff', 'corrcoeff',
'x_variance_fraction', 'y_variance_fraction',
'x_vect', 'y_vect', 'eigenvalue_fraction', 'eigenvalues',
])
def supersvd(X, Y, k=3, eliminate_mean=True):
"""
X and Y - the input data for which correlation is seeked
dim(X) = nT x nX
dim(Y) = nT x nY
nX and nY may be multidimensional
k is the number of seeked pairs
Each array is decomposed into
X[t, i] = Xm[i] + sqrt(size(X)) sum_e XC[e, t] XV[e, i]
Y[t, j] = Ym[j] + sqrt(size(Y)) sum_e YC[e, t] YV[e, j]
Xm[i] = mean(X[t, i], t)
Ym[j] = mean(Y[t, j], t)
||YC[e, :]||_2 = ||XC[e, :]||_2 = 1 for each e
XV and YV form an orthogonal basis, i.e.
sum_i XV[e, i] XV[e', i] = 0
sum_j YV[e, j] YV[e', j] = 0
when e != e'
Returns
XC: k x nT
YC: k x nT
XV: k x nX
YV: k x nY
EF: k, fraction of covariance, explained by k-th pair
"""
nTX, *nX = X.shape
nTY, *nY = Y.shape
assert nTX == nTY
nT = nTX
Xorig = X
Yorig = Y
X = Xorig.reshape(nT, -1)
Y = Yorig.reshape(nT, -1)
if eliminate_mean:
X = X - X.mean(axis=0)
Y = Y - Y.mean(axis=0)
# Norming makes eigenvalues ~O(1)
COV = (X.T @ Y) / nT / (X.shape[1] * Y.shape[1])**0.25
U, S, Vt = _spla.svds(COV, k=k)
perm = _np.argsort(-S)
S = S[perm]
XV = U.T[perm, :]
YV = Vt [perm, :]
XC = (XV @ X.T) / _np.sqrt(X.size)
YC = (YV @ Y.T) / _np.sqrt(Y.size)
xnorm = _np.linalg.norm(XC, axis=1)
ynorm = _np.linalg.norm(YC, axis=1)
XC /= xnorm.reshape(-1, 1)
YC /= ynorm.reshape(-1, 1)
XV *= xnorm.reshape(-1, 1)
YV *= ynorm.reshape(-1, 1)
S2 = S**2
EF = S2 / _np.linalg.norm(COV, 'fro')**2
CORR = _np.einsum('ij,ij->i', XC, YC)
Xvar = _np.var(X)
Yvar = _np.var(Y)
Xvar_frac = _np.zeros_like(CORR)
Yvar_frac = _np.zeros_like(CORR)
for e in range(k):
Xvar_frac[e] = _np.var(_np.sqrt(X.size) * _np.outer(XC[e, :], XV[e, :]))
Yvar_frac[e] = _np.var(_np.sqrt(Y.size) * _np.outer(YC[e, :], YV[e, :]))
Xvar_frac /= Xvar
Yvar_frac /= Yvar
return SuperSvdResult(
x_coeff=XC,
y_coeff=YC,
corrcoeff=CORR,
x_variance_fraction=Xvar_frac,
y_variance_fraction=Yvar_frac,
x_vect=XV.reshape(k, *nX),
y_vect=YV.reshape(k, *nY),
eigenvalue_fraction=EF,
eigenvalues=S2,
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment