Speed issues between a class in plain python and similar converted class with numpy

Question

Following are 2 classes,which do some calculation based on input and give back some output. One class is implemented in python another is converted into numpy array. So obviously 2 classes with python codes and numpy. Both classes return the same result. Purpose of numpy class is to speed up running performance. It turns out that in that case numpy class with array takes for ages to perfom the task, whereas the quicker performance is expected, Perhaps for loop slows numpy class. longness of class may not hope to bother you. Why numpy is slower and take more time? What is wrong?

A class with implementation of python:

import math
import numpy as np
from time import time
from  itertools import islice, count


## A function to generate for-loop numbers with decimals.
def seq(start, end, step):
    assert(step != 0)
    sample_count = int(abs(end - start) / step)
    return islice(count(start, step), sample_count)


class ULSMomentCapacity(object):
    
    def __init__(self, WIDTH, HEIGHT, SIZE_OF_REBAR_TOP, SIZE_OF_REBAR_BOTTOM, SIZE_OF_STIRRUP, STEEL_E_MODULE, STEEL_STRENGTH, COMPRESSIVE_STRENGTH_CONCRETE, CONCRETE_STRAIN,
                MAX_STEEL_STRAIN, MIN_STEEL_STRAIN, EXTERNAL_MOMENT):
        self.width, self.height = WIDTH, HEIGHT
        self.rebar_size_top, self.rebar_size_bottom, self.rebar_size_stirrup = SIZE_OF_REBAR_TOP, SIZE_OF_REBAR_BOTTOM, SIZE_OF_STIRRUP
        self.max_steel_strain, self.min_steel_strain, self.concrete_strain = 0.08, 0.002, 0.0035
        self.steel_E_module, self.steel_strength, self.compressive_concrete_strength = STEEL_E_MODULE, STEEL_STRENGTH, COMPRESSIVE_STRENGTH_CONCRETE
        self.external_moment = EXTERNAL_MOMENT
        self.number_of_rebars_xy = (7, 4)
        self.minimum_numbers_top_rebars = 2
        self.k1 = 0.8

    def getdata(self):
        
        self.effective_distance_y = [self.height - y  for y in [63.0, 103.5, 144.0, 184.5, 225.0] for x in range(self.number_of_rebars_xy[0]) ]
        self.effective_distance_y_top = [63, 63]
        self.effective_distance_y_bottom = [ y  for y in [63.0, 103.5, 144.0, 184.5, 225.0] for x in range(self.number_of_rebars_xy[0]) ]
        LAMBDA_AREA_REBARS = lambda SIZE_OF_REBAR:int(math.pi/4*(SIZE_OF_REBAR)**2)
        self.AREA_OF_REBARS_TOP = [LAMBDA_AREA_REBARS(self.rebar_size_top)  for x in range(self.minimum_numbers_top_rebars)] 
        MAX_NUMBER_REBAR =self.number_of_rebars_xy[0]*self.number_of_rebars_xy[1]

        for generator_rebars  in np.arange(2, MAX_NUMBER_REBAR+1):
            self.AREA_OF_REBARS_BOTTOM = [LAMBDA_AREA_REBARS(self.rebar_size_bottom)  for x in range(generator_rebars)] 
              
            for x in seq(0.1, self.height+0.1, 0.1):

                strain_in_rebars_top = [self.strain(self.concrete_strain , distance_of_rebar = self.effective_distance_y_top[i], x_iteration = x) for i in range(len(self.effective_distance_y_top)) ]
                strain_in_rebars_bottom = [self.strain(self.concrete_strain , distance_of_rebar = self.effective_distance_y[i], x_iteration = x) for i in range(len(self.effective_distance_y[:generator_rebars])) ]

                stress_in_rebars_top = [self.stress(self.steel_E_module , strain_in_rebars_top[i], self.steel_strength) for i in range(len(strain_in_rebars_top))]
                stress_in_rebars_bottom = [ self.stress(self.steel_E_module , strain_in_rebars_bottom[i], self.steel_strength) for i in range(len(strain_in_rebars_bottom)) ]
                
                normal_force = ( - 0.8 * x * self.width * self.compressive_concrete_strength +  self.addingforces(self.AREA_OF_REBARS_BOTTOM, stress_in_rebars_bottom) 
                                + self.addingforces(self.AREA_OF_REBARS_TOP, stress_in_rebars_top)) * 10**-3 

                if 0.50 >= round(normal_force, 2) >= -1.0:
                    moment = ( self.k1* x * self.width * self.compressive_concrete_strength * (self.height/2 - (self.k1* x)/2) + self.addingmomentsbottom(self.AREA_OF_REBARS_BOTTOM, stress_in_rebars_bottom,
                              self.effective_distance_y, self.height) - self.addingmomentstop(self.AREA_OF_REBARS_TOP, stress_in_rebars_top, self.effective_distance_y_top, self.height)) * 10**-6

                    if moment > self.external_moment and strain_in_rebars_bottom[0] > self.min_steel_strain and strain_in_rebars_bottom[0] < self.max_steel_strain :               
                        return moment, x, len(self.AREA_OF_REBARS_BOTTOM), self.AREA_OF_REBARS_BOTTOM, self.effective_distance_y_bottom
            

    @staticmethod
    def strain(concrete_strain, distance_of_rebar, x_iteration):
        concrete_strain = concrete_strain 
        distance_of_rebar = distance_of_rebar
        x_iteration =  x_iteration
        try:
          strain = - concrete_strain / x_iteration * ( x_iteration - distance_of_rebar)
        except (ZeroDivisionError, RuntimeWarning):
          strain = 0
        return strain

    @staticmethod
    def stress(STEEL_E_MODULE, strain_in_rebars, STEEL_STRENGTH):
        STRESS =  STEEL_E_MODULE * strain_in_rebars
        stress = - STEEL_STRENGTH if STRESS <= - STEEL_STRENGTH else STRESS if STRESS >= - STEEL_STRENGTH and STRESS < 0 else min(STRESS, STEEL_STRENGTH)
        return stress

    @staticmethod
    def addingforces(AREA_OF_REBARS, stress_in_rebars):
        force = 0
        for index in range(len(AREA_OF_REBARS)):
          force += AREA_OF_REBARS[index] * stress_in_rebars[index]
        return force


    @staticmethod
    def addingmomentsbottom(AREA_OF_REBARS, stress_in_rebars, distance_to_rebars, height):
        add_moment = 0
        for index in range(len(AREA_OF_REBARS)):
          add_moment += AREA_OF_REBARS[index] * stress_in_rebars[index] * (distance_to_rebars[index] - 0.5 * height )
        return add_moment
     

    @staticmethod
    def addingmomentstop(AREA_OF_REBARS, stress_in_rebars, distance_to_rebars, height):
        add_moment = 0
        for index in range(len(AREA_OF_REBARS)):
          add_moment += AREA_OF_REBARS[index] * stress_in_rebars[index] * ( 0.5 * height - distance_to_rebars[index] )
        return add_moment

A class with implementation of numpy array:

class ULSMomentCapacitynumpy(object):
    
    def __init__(self, WIDTH, HEIGHT, SIZE_OF_REBAR_TOP, SIZE_OF_REBAR_BOTTOM, SIZE_OF_STIRRUP, STEEL_E_MODULE, STEEL_STRENGTH, COMPRESSIVE_STRENGTH_CONCRETE, CONCRETE_STRAIN,
                MAX_STEEL_STRAIN, MIN_STEEL_STRAIN, EXTERNAL_MOMENT):
        self.width, self.height = WIDTH, HEIGHT
        self.rebar_size_top, self.rebar_size_bottom, self.rebar_size_stirrup = SIZE_OF_REBAR_TOP, SIZE_OF_REBAR_BOTTOM, SIZE_OF_STIRRUP
        self.max_steel_strain, self.min_steel_strain, self.concrete_strain = 0.08, 0.002, 0.0035
        self.steel_E_module, self.steel_strength, self.compressive_concrete_strength = STEEL_E_MODULE, STEEL_STRENGTH, COMPRESSIVE_STRENGTH_CONCRETE
        self.external_moment = EXTERNAL_MOMENT
        self.number_of_rebars_xy = (7, 4)
        self.minimum_numbers_top_rebars = 2
        self.k1 = 0.8

    def getdata(self):
        
        self.effective_distance_y_numpy = np.subtract( self.height, np.repeat([63.0, 103.5, 144.0, 184.5, 225.0], self.number_of_rebars_xy[0]))
        self.effective_distance_y_top_numpy = np.array([63, 63])
        self.effective_distance_y_bottom_numpy = np.repeat([63.0, 103.5, 144.0, 184.5, 225.0], self.number_of_rebars_xy[0])

        LAMBDA_AREA_REBARS = lambda SIZE_OF_REBAR:int(math.pi/4*(SIZE_OF_REBAR)**2)
        self.AREA_OF_REBARS_TOP_NUMPY = np.repeat(np.around(np.multiply( np.divide(np.pi, 4), np.power(self.rebar_size_top, 2)), 3), self.minimum_numbers_top_rebars)
        MAX_NUMBER_REBAR =self.number_of_rebars_xy[0]*self.number_of_rebars_xy[1]

        for generator_rebars  in np.arange(2, MAX_NUMBER_REBAR+1): 
            self.AREA_OF_REBARS_BOTTOM_NUMPY = np.repeat(np.around(np.multiply( np.divide(np.pi, 4), np.power(self.rebar_size_bottom, 2)), 3), generator_rebars)
              
            for x in np.arange(0.1, self.height+0.1, 0.1):
                
                strain_in_rebars_top_numpy = np.apply_along_axis(lambda i: self.strain(self.concrete_strain , distance_of_rebar = self.effective_distance_y_top_numpy[i], x_iteration = x),0,np.arange( self.effective_distance_y_top_numpy.size))
                strain_in_rebars_bottom_numpy =  np.apply_along_axis(lambda i: self.strain(self.concrete_strain , distance_of_rebar = self.effective_distance_y_numpy[i], x_iteration = x),0,np.arange( self.effective_distance_y_numpy[:generator_rebars].size))

                stress_in_rebars_top_numpy =  np.apply_along_axis(lambda i: self.stress(self.steel_E_module , strain_in_rebars_top_numpy[i], self.steel_strength),0,np.arange( strain_in_rebars_top_numpy.size))
                stress_in_rebars_bottom_numpy =  np.apply_along_axis(lambda i: self.stress(self.steel_E_module , strain_in_rebars_bottom_numpy[i], self.steel_strength),0,np.arange( strain_in_rebars_bottom_numpy.size)) 

                normal_force_numpy = np.divide(np.add(np.multiply(np.multiply(-0.8, x), np.multiply(self.width, self.compressive_concrete_strength)), 
                       np.add(self.addingforces(self.AREA_OF_REBARS_BOTTOM_NUMPY, stress_in_rebars_bottom_numpy), 
                              self.addingforces(self.AREA_OF_REBARS_TOP_NUMPY, stress_in_rebars_top_numpy))),1000)
                
                if 0.50 >= round(normal_force_numpy, 2) >= -1.0:                    
                    moment_numpy = np.divide(
                    np.add(
                           np.multiply(np.multiply(np.multiply(self.k1, x), np.multiply(self.width, self.compressive_concrete_strength)), np.subtract(np.divide(self.height,2), np.divide(np.multiply(self.k1, x),2)))       
                                                                                                                                                                            ,
                           np.subtract( self.addingmomentsbottomnumpy(self.AREA_OF_REBARS_BOTTOM_NUMPY, stress_in_rebars_bottom_numpy, self.effective_distance_y_numpy[:self.AREA_OF_REBARS_BOTTOM_NUMPY.size], self.height), 
                               self.addingmomentstopnumpy(self.AREA_OF_REBARS_TOP_NUMPY, stress_in_rebars_top_numpy, self.effective_distance_y_top_numpy[:self.AREA_OF_REBARS_TOP_NUMPY.size], self.height)                                             
                               )
                                   )   
                          ,
                          
                        1000000 )
                    if moment_numpy > self.external_moment and strain_in_rebars_bottom_numpy[0] > self.min_steel_strain and strain_in_rebars_bottom_numpy[0] < self.max_steel_strain :             
                        return moment_numpy, x, self.AREA_OF_REBARS_BOTTOM_NUMPY.size, self.AREA_OF_REBARS_BOTTOM_NUMPY, self.effective_distance_y_bottom_numpy
            
    @staticmethod
    def strain(concrete_strain, distance_of_rebar, x_iteration):
        concrete_strain = concrete_strain 
        distance_of_rebar = distance_of_rebar
        x_iteration =  x_iteration
        try:
          strain = np.multiply(                                                       
          np.subtract( x_iteration, distance_of_rebar ) ,
          np.divide(- concrete_strain, x_iteration )   )
        except (FloatingPointError ,ZeroDivisionError, RuntimeWarning):
          strain = 0
        return strain

    @staticmethod
    def stress( STEEL_E_MODULE, strain_in_rebars_array, STEEL_STRENGTH):
        STRESS =  STEEL_E_MODULE * strain_in_rebars_array
        stress_array = np.where(
        STRESS <= -STEEL_STRENGTH, -STEEL_STRENGTH,
        np.where(
          np.logical_and(STRESS >= -STEEL_STRENGTH, STRESS < 0), STRESS,
          np.minimum(STRESS, STEEL_STRENGTH)
             )
            )
        return stress_array
          
    @staticmethod
    def addingforces(AREA_OF_REBARS, stress_in_rebars):
        return np.sum(np.multiply(AREA_OF_REBARS, stress_in_rebars))

    @staticmethod
    def addingmomentsbottomnumpy(AREA_OF_REBARS, stress_in_rebars, distance_to_rebars, height):
        return np.sum(np.multiply(np.multiply( np.subtract(distance_to_rebars, np.multiply(0.5, height)),stress_in_rebars), AREA_OF_REBARS ))

    @staticmethod
    def addingmomentstopnumpy(AREA_OF_REBARS, stress_in_rebars, distance_to_rebars, height):
        return np.sum(np.multiply(np.multiply( np.subtract(np.multiply(0.5, height), distance_to_rebars),stress_in_rebars), AREA_OF_REBARS ))

The following codes to test speed of both classes, every arguments are fixed, just one value varies:

print("________________________Starting... ____________________________", "
")

def withpython():
    EXTERNAL_MOMENT=[100, 200, 300, 400, 500, 200, 400]
    c = [ULSMomentCapacity(WIDTH=400, HEIGHT=500, SIZE_OF_REBAR_TOP=16, SIZE_OF_REBAR_BOTTOM=16, SIZE_OF_STIRRUP=10, STEEL_E_MODULE=200000, 
                      STEEL_STRENGTH=425, COMPRESSIVE_STRENGTH_CONCRETE=20, CONCRETE_STRAIN=0.0035, MAX_STEEL_STRAIN=0.08, MIN_STEEL_STRAIN=0.002, EXTERNAL_MOMENT=n) for n in EXTERNAL_MOMENT]
    for i in range(len(c)):
        if c[i].getdata():
            M,x, n, As, dy = c[i].getdata()
    return M,x, n, As, dy

def withnumpy():
    EXTERNAL_MOMENT=[100, 200, 300, 400, 500, 200, 400]
    c = [ULSMomentCapacitynumpy(WIDTH=400, HEIGHT=500, SIZE_OF_REBAR_TOP=16, SIZE_OF_REBAR_BOTTOM=16, SIZE_OF_STIRRUP=10, STEEL_E_MODULE=200000, 
                      STEEL_STRENGTH=425, COMPRESSIVE_STRENGTH_CONCRETE=20, CONCRETE_STRAIN=0.0035, MAX_STEEL_STRAIN=0.08, MIN_STEEL_STRAIN=0.002, EXTERNAL_MOMENT=n) for n in EXTERNAL_MOMENT]
    for i in range(len(c)):
        if c[i].getdata():
            M,x, n, As, dy = c[i].getdata()
    return M,x, n, As, dy


tstart = time()
withpython()
tend = time()

tstartn = time()
withnumpy()
tendn = time()

print("Time taken with plain python:",round(tend-tstart,3),"
" ,"Time taken with numpy:", round(tendn-tstartn,3))

print("________________________Ending... ____________________________", "
")

Time differences:

________________________Starting... ____________________________ 

Time taken with plain python: 7.696
Time taken with numpy: 188.523
________________________Ending... ____________________________

Manu Vald&#233;s · Accepted Answer

This is my take, it reduces execution time with numpy to 0.05 seconds. With no documentation I can't guarantee results will be correct, but the philosophy behind the changes definitively is.

As has been said, you need to vectorize your logic, most importantly substituting loops for matrix operations. Other considerations include:

you can use arithmetic operators on arrays, no need for np.multiply, substract etc.
np.dot can substitute an element-wise multiplication and a sum.
function calls inside loops are a bad idea. Python calls are very expensive.
creating numpy arrays is expensive; using arange to create an array and use it to loop on another is a very very bad idea.

This is the code, with comment on every change; I have also removed all _numpy suffixes for sanity.

import numpy as np
import math

class ULSMomentCapacitynumpy(object):
    def __init__(
        self,
        WIDTH,
        HEIGHT,
        SIZE_OF_REBAR_TOP,
        SIZE_OF_REBAR_BOTTOM,
        SIZE_OF_STIRRUP,
        STEEL_E_MODULE,
        STEEL_STRENGTH,
        COMPRESSIVE_STRENGTH_CONCRETE,
        CONCRETE_STRAIN,
        MAX_STEEL_STRAIN,
        MIN_STEEL_STRAIN,
        EXTERNAL_MOMENT,
    ):
        self.width, self.height = WIDTH, HEIGHT
        self.rebar_size_top, self.rebar_size_bottom, self.rebar_size_stirrup = (
            SIZE_OF_REBAR_TOP,
            SIZE_OF_REBAR_BOTTOM,
            SIZE_OF_STIRRUP,
        )
        self.max_steel_strain, self.min_steel_strain, self.concrete_strain = (
            0.08,
            0.002,
            0.0035,
        )
        self.steel_E_module, self.steel_strength, self.compressive_concrete_strength = (
            STEEL_E_MODULE,
            STEEL_STRENGTH,
            COMPRESSIVE_STRENGTH_CONCRETE,
        )
        self.external_moment = EXTERNAL_MOMENT
        self.number_of_rebars_xy = (7, 4)
        self.minimum_numbers_top_rebars = 2
        self.k1 = 0.8

    def getdata(self):

        self.effective_distance_y = np.subtract(
            self.height,
            np.repeat([63.0, 103.5, 144.0, 184.5, 225.0], self.number_of_rebars_xy[0]),
        )
        self.effective_distance_y_top = np.array([63, 63])
        self.effective_distance_y_bottom = np.repeat(
            [63.0, 103.5, 144.0, 184.5, 225.0], self.number_of_rebars_xy[0]
        )
        # this lambda is not being used, 
        LAMBDA_AREA_REBARS = lambda SIZE_OF_REBAR: int(
            math.pi / 4 * (SIZE_OF_REBAR) ** 2
        )
        # Truncated rounding to 0 decimals, since original implementation is rounding to int
        self.AREA_OF_REBARS_TOP = np.ones(self.minimum_numbers_top_rebars) * np.around(
                math.pi / 4 * (self.rebar_size_top) ** 2, 0
            )

        MAX_NUMBER_REBAR = self.number_of_rebars_xy[0] * self.number_of_rebars_xy[1]

        for generator_rebars in np.arange(2, MAX_NUMBER_REBAR + 1):
            # Truncated rounding to 0 decimals, since original implementation is rounding to int
            self.AREA_OF_REBARS_BOTTOM = np.ones(generator_rebars) * np.around(
                math.pi / 4 * (self.rebar_size_bottom) ** 2, 0
            )
            # instead of looping the arange, we will work with it as an array
            x = np.arange(0.1, self.height + 0.1, 0.1)
            # this new axis makes x into a 2D array with size [5000,1], to allow for division by x
            # aftwards we will squeeze it back to a vector of size [5000,]
            x = x[:, np.newaxis]

            # unnecesary looping, array creation and lambda call substitued by array arithmetic.
            # No need to protect against division by zero since x never takes that value.
            strain_in_rebars_top = (x - self.effective_distance_y_top) * (
                -self.concrete_strain / x
            )
            # unnecesary looping, array creation and lambda call substitued by array arithmetic.
            strain_in_rebars_bottom = (
                x - self.effective_distance_y[:generator_rebars]
            ) * (-self.concrete_strain / x)
            # unnecesary looping, array creation and lambda call substitued by array arithmetic.
            stress_in_rebars_top = np.clip(
                self.steel_E_module * strain_in_rebars_top,
                -self.steel_strength,
                self.steel_strength,
            )
            # unnecesary looping, array creation and lambda call substitued by array arithmetic.
            stress_in_rebars_bottom = np.clip(
                self.steel_E_module * strain_in_rebars_bottom,
                -self.steel_strength,
                self.steel_strength,
            )
            # x can convert back to a vector since now we are going to use dot
            # np.dot multiplies 2 vectors item wise and the sums the result, what addingforces was doing.
            x = np.squeeze(x)

            normal_force = (
                -0.8 * x * self.width * self.compressive_concrete_strength
                + np.dot(self.AREA_OF_REBARS_BOTTOM, stress_in_rebars_bottom.T,)
                + np.dot(self.AREA_OF_REBARS_TOP, stress_in_rebars_top.T)
            ) * 10 ** -3

            # This is tricky; in the original loop you were returning after encountering
            # the first element that passed the condition. Because we have vectorized x
            # we calculate nomal_force for every value, and now we keep only the ones that pass
            # the condition. It might seem like unnecesary work is done, but the result is still
            # orders of magnitude faster than the scalar approach.

            normal_force = np.around(normal_force, 2)
            valid_force_indexes = np.argwhere(
                (normal_force <= 0.50) & (normal_force >= -1.0)
            )[:, 0]

            # we reduce x and stress to only those where a valid normal_force was found:
            x = x[valid_force_indexes]
            stress_in_rebars_bottom = stress_in_rebars_bottom[valid_force_indexes, :]
            stress_in_rebars_top = stress_in_rebars_top[valid_force_indexes]

            moment = (
                self.k1
                * x
                * self.width
                * self.compressive_concrete_strength
                * (self.height / 2 - (self.k1 * x) / 2)
                + np.sum(
                    self.AREA_OF_REBARS_BOTTOM
                    * stress_in_rebars_bottom
                    * (
                        self.effective_distance_y[: self.AREA_OF_REBARS_BOTTOM.shape[0]]
                        - 0.5 * self.height
                    )
                )
                - np.sum(
                    self.AREA_OF_REBARS_TOP
                    * stress_in_rebars_top
                    * (-0.5 * self.height - self.effective_distance_y_top)
                )
            ) * 10 ** -6

            # finally we loop the valid x points, and return the first that matches the condition
            for moment, strain, x in zip(
                moment, strain_in_rebars_bottom[valid_force_indexes, 0], x
            ):

                if (
                    (moment > self.external_moment)
                    and (strain > self.min_steel_strain)
                    and (strain < self.max_steel_strain)
                ):

                    return (
                        moment,
                        x,
                        len(self.AREA_OF_REBARS_BOTTOM),
                        self.AREA_OF_REBARS_BOTTOM,
                        self.effective_distance_y_bottom,
                    )

Speed issues between a class in plain python and similar converted class with numpy

Answers (1)

Related Questions