Slug Pue
Slug Pue

Reputation: 256

Efficient way to create a dense matrix from diagonal vectors in Python?

I am trying to create this matrix in Python using numpy vectors:

matrix

where the values come from a function. I have implemented it with repeatedly using numpy.diag but for large dimensions, it becomes very slow. Here is the code:

def makeS(N):
    vec = np.full(N, 2*v(x_range[1]))
    vec[0]*=0.5 
    S = np.diag(vec)

    vec = np.full(N-1, v(x_range[0]))   
    S+= np.diag(vec, 1)

    for m in xrange(1, N):
        vec =  np.full(N-m, 2*v(x_range[m+1]))
        vec[0]*= 0.5
        S += np.diag(vec, -m)
    return S

where v() is the said function and x_range is a vector of x-values. Is there a way to make this more efficient?

Edit: Here is a full example:

import numpy as np
import math 

N = 5
x_range = np.linspace(0, 1, N+1)


def v(x):
    return math.exp(x)

def makeS(N):
    vec = np.full(N, 2*v(x_range[1]))
    vec[0]*=0.5 
    S = np.diag(vec)

    vec = np.full(N-1, v(x_range[0]))   
    S+= np.diag(vec, 1)

    for m in xrange(1, N):
        vec =  np.full(N-m, 2*v(x_range[m+1]))
        vec[0]*= 0.5
        S += np.diag(vec, -m)
    return S


print makeS(N)

which outputs

[[ 1.22140276  1.          0.          0.          0.        ]
 [ 1.4918247   2.44280552  1.          0.          0.        ]
 [ 1.8221188   2.9836494   2.44280552  1.          0.        ]
 [ 2.22554093  3.6442376   2.9836494   2.44280552  1.        ]
 [ 2.71828183  4.45108186  3.6442376   2.9836494   2.44280552]]

Upvotes: 2

Views: 327

Answers (1)

Daniel Renshaw
Daniel Renshaw

Reputation: 34177

This is the fastest approach I could find:

def makeS(N):
    values = np.array([v(x) for x in x_range])
    values_doubled = 2 * values
    result = np.eye(N, k=1) * values[0]
    result[:, 0] = values[1:]
    for i in xrange(N - 1):
        result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
    return result

With N=2000 the original takes 26.97 seconds on my machine while the new version takes 0.02339 seconds.

Here is the complete script for evaluating timings with some additional approaches.

import numpy as np
import math
import timeit


def v(x):
    return math.exp(x)


def makeS1(N, x_range):
    vec = np.full(N, 2 * v(x_range[1]))
    vec[0] *= 0.5
    S = np.diag(vec)
    vec = np.full(N - 1, v(x_range[0]))
    S += np.diag(vec, 1)
    for m in xrange(1, N):
        vec = np.full(N - m, 2 * v(x_range[m + 1]))
        vec[0] *= 0.5
        S += np.diag(vec, -m)
    return S


def makeS2(N, x_range):
    values = np.array([v(x) for x in x_range])
    values_doubled = 2 * values

    def value_at_position(ai, aj):
        result = np.zeros((N, N))
        for i, j in zip(ai.flatten(), aj.flatten()):
            if j > i + 1:
                continue
            elif j == i + 1:
                result[i, j] = values[0]
            elif j == 0:
                result[i, j] = values[i + 1]
            else:
                result[i, j] = values_doubled[i - j + 1]
        return result

    return np.fromfunction(value_at_position, (N, N))


def makeS3(N, x_range):
    values = np.array([v(x) for x in x_range])
    values_doubled = 2 * values
    result = np.zeros((N, N))
    for i in xrange(N):
        for j in xrange(min(i + 2, N)):
            if j == i + 1:
                result[i, j] = values[0]
            elif j == 0:
                result[i, j] = values[i + 1]
            else:
                result[i, j] = values_doubled[i - j + 1]
    return result


def makeS4(N, x_range):
    values = np.array([v(x) for x in x_range])
    values_doubled = 2 * values
    result = np.eye(N, k=1) * values[0]
    result[:, 0] = values[1:]
    for i in xrange(N - 1):
        result[i + 1, 1:i + 2] = values_doubled[1:i + 2][::-1]
    return result


def main():
    N = 2000
    x_range = np.random.randn(N + 1)

    start = timeit.default_timer()
    s1 = makeS1(N, x_range)
    print 'makeS1', timeit.default_timer() - start
    start = timeit.default_timer()
    s2 = makeS2(N, x_range)
    print 'makeS2', timeit.default_timer() - start
    start = timeit.default_timer()
    s3 = makeS3(N, x_range)
    print 'makeS3', timeit.default_timer() - start
    start = timeit.default_timer()
    s4 = makeS4(N, x_range)
    print 'makeS4', timeit.default_timer() - start
    if N < 10:
        print s1
        print s2
        print s2
        print s4
    assert np.allclose(s1, s2)
    assert np.allclose(s2, s3)
    assert np.allclose(s3, s4)


main()

On my machine, this produces the output:

makeS1 26.9707232448
makeS2 11.7728229076
makeS3 0.643742975052
makeS4 0.0233912765665

Upvotes: 1

Related Questions