marshall
marshall

Reputation: 2483

How to speed up matrix code

I have the following simple code which estimates the probability that an h by n binary matrix has a certain property. It runs in exponential time (which is bad to start with) but I am surprised it is so slow even for n = 12 and h = 9.

#!/usr/bin/python

import numpy as np
import itertools

n = 12
h = 9

F = np.matrix(list(itertools.product([0,1],repeat = n))).transpose()

count = 0
iters = 100
for i in xrange(iters):
    M =  np.random.randint(2, size=(h,n))
    product = np.dot(M,F)
    setofcols = set()
    for column in product.T:
        setofcols.add(repr(column))
    if (len(setofcols)==2**n):
        count = count + 1
print count*1.0/iters

I have profiled it using n = 10 and h = 7. The output is rather long but here are the lines that took more time.

        23447867 function calls (23038179 primitive calls) in 35.785 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        2    0.002    0.001    0.019    0.010 __init__.py:1(<module>)
        1    0.001    0.001    0.054    0.054 __init__.py:106(<module>)
        1    0.001    0.001    0.022    0.022 __init__.py:15(<module>)
        2    0.003    0.002    0.013    0.006 __init__.py:2(<module>)
        1    0.001    0.001    0.003    0.003 __init__.py:38(<module>)
        1    0.001    0.001    0.001    0.001 __init__.py:4(<module>)
        1    0.001    0.001    0.004    0.004 __init__.py:45(<module>)
        1    0.001    0.001    0.002    0.002 __init__.py:88(<module>)
   307200    0.306    0.000    1.584    0.000 _methods.py:24(_any)
   102400    0.026    0.000    0.026    0.000 arrayprint.py:22(product)
   102400    1.345    0.000   32.795    0.000 arrayprint.py:225(_array2string)
307200/102400    1.166    0.000   33.350    0.000 arrayprint.py:335(array2string)
   716800    0.820    0.000    1.162    0.000 arrayprint.py:448(_extendLine)
204800/102400    1.699    0.000    5.090    0.000 arrayprint.py:456(_formatArray)
   307200    0.651    0.000   22.510    0.000 arrayprint.py:524(__init__)
   307200   11.783    0.000   21.859    0.000 arrayprint.py:538(fillFormat)
  1353748    1.920    0.000    2.537    0.000 arrayprint.py:627(_digits)
   102400    0.576    0.000    2.523    0.000 arrayprint.py:636(__init__)
   716800    2.159    0.000    2.159    0.000 arrayprint.py:649(__call__)
   307200    0.099    0.000    0.099    0.000 arrayprint.py:658(__init__)
   102400    0.163    0.000    0.225    0.000 arrayprint.py:686(__init__)
   102400    0.307    0.000   13.784    0.000 arrayprint.py:697(__init__)
   102400    0.110    0.000    0.110    0.000 arrayprint.py:713(__init__)
   102400    0.043    0.000    0.043    0.000 arrayprint.py:741(__init__)
        1    0.003    0.003    0.003    0.003 chebyshev.py:87(<module>)
        2    0.001    0.000    0.001    0.000 collections.py:284(namedtuple)
        1    0.277    0.277   35.786   35.786 counterfeit.py:3(<module>)
   205002    0.222    0.000    0.247    0.000 defmatrix.py:279(__array_finalize__)
   102500    0.747    0.000    1.077    0.000 defmatrix.py:301(__getitem__)
   102400    0.322    0.000   34.236    0.000 defmatrix.py:352(__repr__)
   102400    0.100    0.000    0.508    0.000 fromnumeric.py:1087(ravel)
   307200    0.382    0.000    2.829    0.000 fromnumeric.py:1563(any)
      271    0.004    0.000    0.005    0.000 function_base.py:3220(add_newdoc)
        1    0.003    0.003    0.003    0.003 hermite.py:59(<module>)
        1    0.003    0.003    0.003    0.003 hermite_e.py:59(<module>)
        1    0.001    0.001    0.002    0.002 index_tricks.py:1(<module>)
        1    0.003    0.003    0.003    0.003 laguerre.py:59(<module>)
        1    0.003    0.003    0.003    0.003 legendre.py:83(<module>)
        1    0.001    0.001    0.001    0.001 linalg.py:10(<module>)
        1    0.001    0.001    0.001    0.001 numeric.py:1(<module>)
   102400    0.247    0.000   33.598    0.000 numeric.py:1365(array_repr)
   204800    0.321    0.000    1.143    0.000 numeric.py:1437(array_str)
   614400    1.199    0.000    2.627    0.000 numeric.py:2178(seterr)
   614400    0.837    0.000    0.918    0.000 numeric.py:2274(geterr)
   102400    0.081    0.000    0.186    0.000 numeric.py:252(asarray)
   307200    0.259    0.000    0.622    0.000 numeric.py:322(asanyarray)
        1    0.003    0.003    0.004    0.004 polynomial.py:54(<module>)
   513130    0.134    0.000    0.134    0.000 {isinstance}
   307229    0.075    0.000    0.075    0.000 {issubclass}
5985327/5985305    0.595    0.000    0.595    0.000 {len}
 306988    0.120    0.000    0.120    0.000 {max}
   102400    0.061    0.000    0.061    0.000 {method '__array__' of 'numpy.ndarray' objects}
   102406    0.027    0.000    0.027    0.000 {method 'add' of 'set' objects}
   307200    0.241    0.000    1.824    0.000 {method 'any' of 'numpy.ndarray' objects}
   307200    0.482    0.000    0.482    0.000 {method 'compress' of 'numpy.ndarray' objects}
   204800    0.035    0.000    0.035    0.000 {method 'item' of 'numpy.ndarray' objects}
   102451    0.014    0.000    0.014    0.000 {method 'join' of 'str' objects}
   102400    0.222    0.000    0.222    0.000 {method 'ravel' of 'numpy.ndarray' objects}
   921176    3.330    0.000    3.330    0.000 {method 'reduce' of 'numpy.ufunc' objects}
   102405    0.057    0.000    0.057    0.000 {method 'replace' of 'str' objects}
  2992167    0.660    0.000    0.660    0.000 {method 'rstrip' of 'str' objects}
   102400    0.041    0.000    0.041    0.000 {method 'splitlines' of 'str' objects}
        6    0.003    0.000    0.003    0.001 {method 'sub' of '_sre.SRE_Pattern' objects}
   307276    0.090    0.000    0.090    0.000 {min}
      100    0.013    0.000    0.013    0.000 {numpy.core._dotblas.dot}
   409639    0.473    0.000    0.473    0.000 {numpy.core.multiarray.array}
  1228800    0.239    0.000    0.239    0.000 {numpy.core.umath.geterrobj}
   614401    0.352    0.000    0.352    0.000 {numpy.core.umath.seterrobj}
   102475    0.031    0.000    0.031    0.000 {range}
   102400    0.076    0.000    0.102    0.000 {reduce}
204845/102445    0.198    0.000   34.333    0.000 {repr}

The multiplication of the matrices seems to take a tiny fraction of the time. Is it possible to speed up the rest?

Results

There are now three answers but one seems to have a bug currently. I have tested the remaining two with n=18, h=11 and iters=10 .

Interestingly, the time for multiplying the matrices is still a tiny fraction of the overall time taken.

Upvotes: 4

Views: 1283

Answers (3)

hpaulj
hpaulj

Reputation: 231385

Try replacing repr(col) with

setofcols.add(tuple(column.A1.tolist()))

set accepts a tuple. column.A1 is the matrix converted to a 1d array. The tuple is then something like (0, 1, 0), which set can easily compare.

Just replacing the expensive repr formatting lops off a lot of time (25x speedup).

EDIT

By creating and filling the set in one statement I get a further 10x speed up. In my tests it is 2x faster than bubble's vectorization.

count = 0
for i in xrange(iters):
    M =  np.random.randint(2, size=(h,n))
    product = np.dot(M,F)
    setofcols = set(tuple(x) for x in product.T.tolist())
    # or {tuple(x) for x in product.T.tolist()} if new enough Python
    if (len(setofcols)==2**n):
        count += 1
        # print M # to see the unique M
print count*1.0/iters

EDIT

Here's something even faster - transform each column of 9 integers into 1, using dot([1,10,100,...],column). Then apply np.unique (or set) to the list of integers. It's a 2-3x further speedup.

count = 0
X = 10**np.arange(h)
for i in xrange(iters):
    M =  np.random.randint(2, size=(h,n))
    product = np.dot(M,F)
    setofcols = np.unique(np.dot(X,product).A1)
    if (setofcols.size==2**n):
        count += 1
print count*1.0/iters

With this the top calls are

  200    0.201    0.001    0.204    0.001 {numpy.core._dotblas.dot}
  100    0.026    0.000    0.026    0.000 {method 'sort' of 'numpy.ndarray' objects}
  100    0.007    0.000    0.035    0.000 arraysetops.py:93(unique)

Upvotes: 2

val
val

Reputation: 8689

As alko and seberg pointed out, you are loosing a lot of time converting your arrays to large strings to store them in your set of columns.

If I understood your code correctly, you are trying to find if the number of different columns in your product matrix is equal to the length of this matrix. You can do that easily by sorting it and looking at differences from one column to the next:

D = (np.diff(np.sort(product.T, axis=0), axis=0) == 0)

This will give you a matrix of booleans D. You can then see whether at least one element changes from one column to the next:

C = (1 - np.prod(D, axis=1)) # i.e. 'not all(D[i,:]) for all i'

You then simply have to take see whether all the values are different:

hasproperty = np.all(C)

Which gives you the complete code:

def f(n, h, iters):
    F = np.array(list(itertools.product([0,1], repeat=n))).T
    counts = []
    for _ in xrange(iters):
        M = np.random.randint(2, size=(h,n))
        product = M.dot(F)
        D = (np.diff(np.sort(product.T, axis=1), axis=0) == 0)
        C =  (1 - np.prod(D, axis=1))
        hasproperty = np.all(C)
        counts.append(1. if hasproperty else 0.)
    return np.mean(counts)

Which takes roughly 8s for f(12, 9, 100).

If you prefer comically compact expressions:

def g(n, h, iters):
    F = np.array(list(itertools.product([0,1], repeat=n))).T
    return np.mean([np.all(1 - np.prod(np.diff(np.sort(np.random.randint(2,size=(h,n)).dot(F).T, axis=1), axis=0)==0, axis=1)) for _ in xrange(iters)])

Timing it gives:

>>> setup = """import numpy as np
def g(n, h, iters):
    F = np.array(list(itertools.product([0,1], repeat=n))).T
    return np.mean([np.all(1 - np.prod(np.diff(np.sort(np.random.randint(2,size=(h,n)).dot(F).T, axis=1), axis=0)==0, axis=1)) for _ in xrange(iters)])
"""
>>> timeit.timeit('g(10, 7, 100)', setup=setup, number=10)
17.358669997900734
>>> timeit.timeit('g(10, 7, 100)', setup=setup, number=50)
83.06966196163967

Or approximatively 1.7s per call to g(10,7,100).

Upvotes: 1

bubble
bubble

Reputation: 1672

To speed up the code above you should avoid loops.

import numpy as np
import itertools

def unique_rows(a):
    a = np.ascontiguousarray(a)
    unique_a = np.unique(a.view([('', a.dtype)]*a.shape[1]))
    return unique_a.view(a.dtype).reshape((unique_a.shape[0], a.shape[1]))


n = 12
h = 9
iters=100
F = np.matrix(list(itertools.product([0,1],repeat = n))).transpose()
M =  np.random.randint(2, size=(h*iters,n))
product = np.dot(M,F)
counts = map(lambda x: len(unique_rows(x.T))==2**n, np.split(product,iters,axis=0))
prob=float(sum(counts))/iters

#All unique submatrices M (hxn) with the sophisticated property...
[np.split(M,iters,axis=0)[j] for j in range(len(counts)) if counts[j]==True]

Upvotes: 3

Related Questions