abcd
abcd

Reputation: 10751

Fast sampling from a user-defined distribution

A simulation I'm running requires me to draw values from a probability distribution. I'm doing this as follows:

import numpy as np
import scipy.special as sp
from scipy.stats import rv_continuous


class epsilon_pdf(rv_continuous):

    def _pdf(self, x, omega):

        return np.exp(omega ** -1 * np.cos(x)) / (2 * np.pi *
                                                  sp.iv(0, omega ** -1))


random_epsilon = epsilon_pdf(a=-np.pi, b=np.pi)
n_trials = 1  # 10 ** 6
goal_dict = {'omega': 2 ** -4, 'xi': 2 ** 0}
for trial_num in xrange(n_trials):
    # Choose m.
    m = np.random.poisson(goal_dict['xi'])
    # Draw m values for epsilon.
    epsilon_values = random_epsilon.rvs(omega=goal_dict['omega'], size=m)

(What's written above is a minimal toy example.)

A major problem I've encountered is that the call to random_epsilon.rvs is incredibly slow -- so slow that when I set n_trials to the needed 10 ** 6, certain values of omega and xi cause the script to take upwards of 377 hours to finish.

Can anyone think of a reformulation in Python code of my probability distribution and my sampling from it that would be faster? (Perhaps there is a way to do this using numpy that would be faster?)

(I'm not sure whether my distribution is a standard one that has been given a name.)

Upvotes: 2

Views: 923

Answers (1)

Abhinav Ramakrishnan
Abhinav Ramakrishnan

Reputation: 1090

import numpy as np
from scipy.stats import rv_continuous
import scipy.special as sp
import time
from numpy import ma as ma

class epsilon_pdf(rv_continuous):
    def _pdf(self, x, omega):
        return np.exp(omega ** -1 * np.cos(x)) / (2 * np.pi *
                                                  sp.iv(0, omega ** -1))

random_epsilon = epsilon_pdf(a=-np.pi, b=np.pi)
n_trials = 1000  # 10 ** 6
goal_dict = {'omega': 2 ** -4, 'xi': 2 ** 0}

random_epsilon_rvs = lambda x: random_epsilon.rvs(
    omega=goal_dict['omega'], 
    size = x
    )

random_epsilon_rvs = np.vectorize(random_epsilon_rvs, otypes=[np.ndarray])

t0 = time.time()
for trial_num in xrange(n_trials):
    # Choose m.
    m = np.random.poisson(goal_dict['xi'])
    # Draw m values for epsilon.
    epsilon_values = random_epsilon.rvs(omega=goal_dict['omega'], size=m)

t1 = time.time()
a = np.random.poisson(2**-4, size = (1, n_trials))[0]
mask = a>0
b = a[mask]
c = random_epsilon_rvs(b)

t2 = time.time()

print t1-t0,t2-t1
# 11.7730000019 vs 0.682999849319

Upvotes: 1

Related Questions