[CUDA] CPU vs GPU with python

Published:

This column compares the speed of the CPU and GPU.

Multiplication

when we use CPU:

import numpy as np
from timeit import default_timer as timer
from numba import vectorize
@vectorize(["float32(float32, float32)"], target = 'cpu')

def MultiplyMyVecotrs(a,b):
    return a*b

def main():
    N =64000000
    A = np.ones(N, dtype=np.float32)
    B = np.ones(N, dtype=np.float32)
    C = np.ones(N, dtype=np.float32)

    start = timer()
    MemoryError(A, B)
    C = MultiplyMyVecotrs(A,B)
    vectormutiply_time = timer() - start
    print("C[:6] = " + str(C[:6]))
    print("C[-6:]= "+str(C[-6:]))

    print("This multiplication took %f seconds" % vectormutiply_time)

main()

C[:6] = [1. 1. 1. 1. 1. 1.]
C[-6:]= [1. 1. 1. 1. 1. 1.]
This multiplication took 0.094402 seconds
import numpy as np
from timeit import default_timer as timer
from numba import vectorize
@vectorize(["float32(float32, float32)"], target = 'cuda')

def MultiplyMyVecotrs(a,b):
    return a*b

def main():
    N =64000000
    A = np.ones(N, dtype=np.float32)
    B = np.ones(N, dtype=np.float32)
    C = np.ones(N, dtype=np.float32)

    start = timer()
    MemoryError(A, B)
    C = MultiplyMyVecotrs(A,B)
    vectormutiply_time = timer() - start
    print("C[:6] = " + str(C[:6]))
    print("C[-6:]= "+str(C[-6:]))

    print("This multiplication took %f seconds" % vectormutiply_time)

main()

C[:6] = [1. 1. 1. 1. 1. 1.]
C[-6:]= [1. 1. 1. 1. 1. 1.]
This multiplication took 0.627809 seconds

Filling an array

import numpy as np
from timeit import default_timer as timer
from numba import cuda, jit

def FillArrayWithoutGPU(a):
  for k in range(1000000):
    a[k]+=1

@jit(target_backend='cuda')
def FillArrayWithGPU(a):
  for k in range(1000000):
    a[k]+=1

a = np.ones(10000000, dtype = np.float64 )
start = timer()
FillArrayWithoutGPU(a)
print("On a CPU: ", timer()-start)

start = timer()
FillArrayWithGPU(a)
print("On a GPU: ", timer()-start)
On a CPU:  0.24581730199997764
On a GPU:  0.07611533699991924

Mandlebrot

import numpy as np
from matplotlib.pylab import imshow, show
from timeit import default_timer as timer

def mandelbrot(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z * z + c
        if (z.real * z.real + z.imag * z.imag) >= 4:
            return i
    return 255

def create_fractal(min_x, max_x, min_y, max_y, image, iters):
    width = image.shape[1]
    height = image.shape[0]
    pixel_size_x = (max_x - min_x) / width
    pixel_size_y = (max_y - min_y) / height

    for x in range(width):
        real = min_x + x * pixel_size_x
        for y in range(height):
            imag = min_y + y * pixel_size_y
            color = mandelbrot(real, imag, iters)
            image[y, x] = color

image = np.zeros((500*10, 750*10), dtype=np.uint8)
s = timer()
create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20)
e = timer()
print("Execution time on CPU: %f seconds" % (e - s))
imshow(image)
show()

add output image

Execution time on CPU: 133.978606 seconds
import numpy as np
from matplotlib.pylab import imshow, show
from timeit import default_timer as timer
from numba import cuda

@cuda.jit(device=True)
def mandelbrot(x, y, max_iters):
    c = complex(x, y)
    z = 0.0j
    for i in range(max_iters):
        z = z * z + c
        if (z.real * z.real + z.imag * z.imag) >= 4:
            return i
    return 255

@cuda.jit
def create_fractal(min_x, max_x, min_y, max_y, image, iters):
    width = image.shape[1]
    height = image.shape[0]
    pixel_size_x = (max_x - min_x) / width
    pixel_size_y = (max_y - min_y) / height

    x,y = cuda.grid(2)
    if x< width and y < height:
          real = min_x + x * pixel_size_x
          imag = min_y + y * pixel_size_y
          color = mandelbrot(real, imag, iters)
          image[y, x] = color

image = np.zeros((500*10*2, 750*10*2), dtype=np.uint8)
pixels = 500 * 10* 2* 750* 10* 2
nthreads=32
nblocksy= ((500 * 10 * 2)//nthreads) + 1
nblocksx= ((750 * 10 * 2)//nthreads) + 1

s = timer()
create_fractal[(nblocksx, nblocksy),(nthreads, nthreads)](-2.0, 1.0, -1.0, 1.0, image, 20)
e = timer()
print("Execution time on GPU: %f seconds" % (e - s))
imshow(image)
show()

add output image

Execution time on GPU: 0.691364 seconds

Aren’t headings cool?