Reputation: 3064
I am trying to simulate rain using NumPy, they say an image is more than a thousand words so here is a description longer than two thousand words:
I already wrote the code, but I think my implementation is inefficient, so I want to know if NumPy has any builtin functions that can speed up the process:
import numpy as np
from PIL import Image
from random import random, randbytes
def rain(width, strikes=360, color=True, lw=3):
assert not width % 16
height = int(width * 9 / 16)
img = np.zeros((height, width, 3), dtype=np.uint8)
half = height / 2
for i in range(strikes):
x = round(random() * width)
y = round(height - random() * half)
x1 = min(x + lw, width - 1)
if color:
rgb = list(randbytes(3))
else:
rgb = (178, 255, 255)
img[0:y, x:x1] = rgb
return img
img1 = Image.fromarray(rain(1920))
img1.show()
img1.save('D:/rain.jpg', format='jpeg', quality=80, optimize=True)
img2 = Image.fromarray(rain(1920, color=False))
img2.show()
img2.save('D:/rain_1.jpg', format='jpeg', quality=80, optimize=True)
Upvotes: 1
Views: 176
Reputation: 91
So the easiest way to speed up code using NumPy is to utilize broadcasting and element-by-element operations, so that less efficient for-loops can be avoided. Below is a performance comparison between my algorithm (rain2) and OP’s (rain1):
import numpy.random as npr
from random import random, randbytes
from PIL import Image
import profile
def rain1(width, strikes=360, color=True, lw=3):
assert not width % 16
height = int(width * 9 / 16)
img = np.zeros((height, width, 3), dtype=np.uint8)
half = height / 2
for i in range(strikes):
x = round(random() * width)
y = round(height - random() * half)
x1 = min(x + lw, width - 1)
if color:
rgb = list(randbytes(3))
else:
rgb = (178, 255, 255)
img[0:y, x:x1] = rgb
return img
def rain2(width, strikes=360, color=True, lw=3):
assert not width % 16
height = width*9//16
[inds,] = np.indices((width,))
img = np.zeros((height, width, 4), dtype=np.uint8)
img[:,:,3] = 255
half = height/2
# randint from numpy.random lets you
# define a lower and upper bound,
# and number of points.
x = list(set(npr.randint(0, width-lw-1, (strikes,))))
x = np.sort(x)
y = npr.randint(half, height, (len(x),))
if color:
rgb = npr.randint(0, 255, (len(x), 3), dtype=np.uint8)
else:
rgb = np.array([178, 255, 255], dtype=np.uint8)
for offset in range(lw):
img[:,x+offset,3] = 0
img[:,x+offset,:3] = rgb
for xi, yi in zip(x, y):
img[0:yi,xi:xi+lw,3] = 255
return img
def example_test_old(strikes, disp_im=True):
img1 = Image.fromarray(rain1(1920, strikes=strikes))
if disp_im: img1.show()
img1.save('rain1.jpg', format='jpeg', quality=80, optimize=True)
img2 = Image.fromarray(rain1(1920, strikes=strikes, color=False))
if disp_im: img2.show()
img2.save('rain1.jpg', format='jpeg', quality=80, optimize=True)
def example_test_new(strikes, disp_im=True):
img1 = Image.fromarray(rain2(1920, strikes=strikes))
if disp_im: img1.show()
img1.save('rain2.png', format='png', quality=80, optimize=True)
img2 = Image.fromarray(rain2(1920, strikes=strikes, color=False))
if disp_im: img2.show()
img2.save('rain2.png', format='png', quality=80, optimize=True)
if __name__ == "__main__":
# Execute only if this module is not imported into another script
example_test_old(360)
example_test_new(360)
profile.run('example_test_old(100000, disp_im=False)')
profile.run('example_test_new(100000, disp_im=False)')
On my PC this speeds it up by a factor of 14.5! Hope this helps.
Upvotes: 0
Reputation: 3141
I was able to improve by 2 to 4 times faster.
Since raindrops do not stop in the upper half of the image, the upper half can be stretched out from the lower half after all strikes end.
Since broadcasting tuples is relatively slow, use 32-bit format color instead.
def rain(width=1920, strikes=360, color=True, lw=3):
assert not width % 16
height = int(width * 9 / 16)
img = np.zeros((height, width), dtype=np.uint32)
half = height / 2
upper_bottom = int(half) - 1
alpha = 255 << 24
# Paint background.
# The upper half will always be overwritten and can be skipped.
img[upper_bottom:] = alpha
for i in range(strikes):
x = round(random() * width)
y = round(height - random() * half)
x1 = min(x + lw, width - 1)
if color:
# Pack color into int. See below for details.
rgb = int.from_bytes(randbytes(3), 'big') + alpha
else:
# This is how to pack color into int.
r, b, g = 178, 255, 255
rgb = r + (g << 8) + (b << 16) + alpha
# Only the lower half needs to be painted in this loop.
img[upper_bottom:y, x:x1] = rgb
# The upper half can simply be stretched from the lower half.
img[:upper_bottom] = img[upper_bottom]
# Unpack uint32 to uint8 x4 without deep copying.
img = img.view(np.uint8).reshape((height, width, 4))
return img
Note:
img
to PIL.Image
, compare its performance too as it is also improved.Because of the rain overlaps each other (which makes removing for-loop hard) and because the strikes are not so many (which makes the room for improvement small), I find it difficult to optimize further. Hopefully this is enough.
Upvotes: 2