home

NumPy

Array Programming extension to Python.

ndarray

Array Creation

Creating ndarray using the array function

import numpy as np

arr = np.array([2, 4, 8])
print(arr)
# [2 4 8]

arr = np.array(
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
)

print(arr)
# [[1 2 3]
#  [4 5 6]]

print(type(arr))
# <class 'numpy.ndarray'>

Creating ndarray using the arange function

import numpy as np

arr = np.array(
    [
        np.arange(2),
        np.arange(2)
    ]
)

print(arr)
# [[0 1]
#  [0 1]]

Creating an ndarray with evenly spaced values

import numpy as np

print(np.linspace(0, 10, 5))
# [ 0.   2.5  5.   7.5 10. ]

References

Indexing and Slicing

Indexing

ndarrays can be indexed using the standard Python x[obj] syntax, where x is the array and obj the selection.

import numpy

arr = numpy.arange(4).reshape(2, 2)

print(arr)
# [[0 1]
#  [2 3]]

print(arr[0])
# [0 1]

print(arr[0, 0])
# 0

Fancy Indexing

Passing a list of indices.

import numpy as np

arr = np.arange(10)
print(arr)
# [0 1 2 3 4 5 6 7 8 9]

print(arr[[0, 2, 4]])
# [0 2 4]

Slicing

A slice is an object containing a portion of a sequence. A slice is created using the subscript notation [], with colons between numbers when several are given, as in x[start:stop:step].

import numpy

arr = numpy.arange(4)

print(arr[2:4])
# [2 3]

Arithmetic with NumPy Arrays

Arithmetic operators on arrays apply element-wise. A new array is created and filled with the result.

import numpy

arr = numpy.array(
    [
        [-1, -2, -3], 
        [4, 5, 6]
    ]
)

# Operations on numpy arrays of same size
print(arr + arr)
# [[-2 -4 -6]
#  [ 8 10 12]]

print(arr * arr)
# [[ 1  4  9]
#  [16 25 36]]

# Comparing elements of same size arrays
print(arr > arr * 2)
# [[ True  True  True]
#  [False False False]]

Aggregates

A List of Available Aggregates

np.sum Compute sum of elements
np.prod Compute product of elements
np.mean Compute median of elements
np.std Compute standard deviation
np.var Compute variance
np.min Find minimum value
np.max Find maximum value
np.argmin Find index of minimum value
np.argmax Find index of maximum value
np.median Compute median of elements
np.percentile Compute rank-based statistics of elements
np.any Evaluate whether any elements are true
np.all Evaluate whether all elements are true

Aggregates on 1-D Arrays

import numpy as np

arr = np.array([1, 2, 3, 4])
print(arr.cumsum())
# [ 1  3  6 10]

print(arr.prod())
# 24

print(arr.cumprod())
# [ 1  2  6 24]

print(arr.mean())
# 2.5
# Also: sum, min, max..

Aggregates on 2-D Arrays

axis parameter can be used to alter the operation to either to be applied on rows or columns.

import numpy as np

arr = np.arange(1, 5).reshape(2, 2)
print(arr)
# [[1 2]
#  [3 4]]

# Default will be for whole matrix.
print(arr.mean())
# 2.5

# Find mean values in each column.
print(arr.mean(axis=0))
# [2. 3.]

# Find mean values in each row.
print(arr.mean(axis=1))
# [1.5 3.5]

# arr can be passed to mean method as an argument.
print(np.mean(arr, axis=0))
# [2. 3.]

Basic Queries

import numpy as np

arr = np.arange(1, 13).reshape(4, 3)
print(arr)

# Counting Entries
print(np.count_nonzero(arr > 5))
# 7

# sum can be used as well, True is interpreted as 1 and False as 0
print(np.sum(arr > 5))
# 7

# Using axis
print(np.count_nonzero(arr > 5, axis=1))
# [0 1 3 3] (0 at row 0, 1 at row 1, 3 at rows 2 and 3.)

# Are there any values greater than 8?
print(np.any(arr > 8))
# True

# Are all values greater than 6?
print(np.all(arr > 6))
# False

# For each row: Are all values in that row greater than 6?
print(np.all(arr > 6, axis=1))
# [False False  True  True]

Broadcasting

Arithmetic operations on different sized arrays or scalar against an array. Most common use is single scalar with an array.

Single Value Broadcasting

Single Value Broadcasting on 1-D ndarray

import numpy

arr = numpy.array(
    [
        [-1, -2, -3], 
        [4, 5, 6]
    ]
)

# 1 is broadcast on array..
print(1 / arr)
# [[-1.         -0.5        -0.33333333]
#  [ 0.25        0.2         0.16666667]]

Single Value Broadcasting on 2-D ndarray

import numpy

arr = numpy.arange(4).reshape(2, 2)

print(arr**2)
# [[0 1]
#  [4 9]]

Array Broadcasting

import numpy as np

arr = np.arange(12).reshape(3, 4)

print(arr)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# Subtract first row from the array.
print(arr - arr[0])
# [[0 0 0 0]
#  [4 4 4 4]
#  [8 8 8 8]]

Further Reading

Boolean Masking

Applying a Boolean array on a ndarray to select a subset.

import numpy as np

arr = np.arange(4)
print(arr)
# [0 1 2 3]

print(arr[[True, True, False, False]])
# [0 1]

# The Boolean array above can also be derived by:
print(arr < 2)
# [ True  True False False]

# Hence:
print(arr[arr < 2])
# [0 1]

Sorting

import numpy as np

np.random.seed(42)

arr = np.random.randint(0, 100, 10)
print(arr)
# [51 92 14 71 60 20 82 86 74 74]

print(np.sort(arr))
# [14 20 51 60 71 74 74 82 86 92]
# arr.sort() will sort the array in place!

print(np.argsort(arr))
# [2 5 0 4 3 8 9 6 7 1]

Examples

Find value appearing a specific number of times

Concatenate 3 ndarrays and find values occurring 3 times in the resulting array.

import numpy as np

arr_1 = np.array([10, 4, 8])
arr_2 = np.array([6, 8, 10])
arr_3 = np.array([4, 8, 10])

arr = np.concatenate((arr_1, arr_2, arr_3))
print(arr)
# [10  4  8  6  8 10  4  8 10]

print(np.flatnonzero(np.bincount(arr) == 3))
# [ 8 10]

100 NumPy Exercises

Selected exercises from:100 NumPy Exercises.

Create random vector of size 10 and replace the maximum value by 0.

import numpy as np
rng = np.random.RandomState(42)

arr = rng.randint(1, 100, 10)
print(arr)
# [52 93 15 72 61 21 83 87 75 75]

arr[np.where(arr == arr.max())] = 0
print(arr)
# [52  0 15 72 61 21 83 87 75 75]

Create a vector of size 10 with values ranging from 0 to 1, both excluded.

import numpy as np
var = np.random.rand(10)
print(var)
# [0.37815741 0.74549658 0.75729011 0.13229187 0.07689481 0.39266261 0.29905693 0.03398927 0.26953101 0.91995753]

Create a random vector of size 10 and sort it.

import numpy as np

arr = np.random.randint(low=0, high=100, size=10)
arr.sort() # Can not chain, sorts in place, does not return the array!
print(arr)
# [ 8 14 46 50 52 59 59 61 72 98]

Random Walk

Single Random Walk

Implement random walk.

import numpy as np
rng = np.random.RandomState(42)

random_walk = (rng.choice(a=[-1, 1], size=10)).cumsum()

print(random_walk)
# [-1  0 -1 -2 -3 -2 -3 -4 -5 -4]

Multiple Random Walks

Implement 5 random walks with a walk length of 10.

import numpy as np

rng = np.random.RandomState(42)

rand_walk = rng.choice([-1, 1], size=[5, 10]).cumsum(axis=1)
print(rand_walk)
# Each row represents a person, starting from person 0, up to person 4.
# [[-1  0 -1 -2 -3 -2 -3 -4 -5 -4]
#  [-1 -2 -3 -4 -3 -4 -3 -2 -1 -2]
#  [ 1  0  1  2  3  4  5  6  7  8]
#  [-1 -2 -1  0  1  0  1  0 -1 -2]
#  [-1 -2 -1  0  1  2  3  2  3  4]]

# Find the person(s) who never stepped into negative.
print(np.where((rand_walk > -1).sum(axis=1) == 10)[0])
# [2]

# Find the person(s) who stepped on point -4 the most.
no_times_at_min4 = np.sum(rand_walk == -4, axis=1)
print(np.where((no_times_at_min4 == no_times_at_min4.max()) & (no_times_at_min4 > 0))[0])
# [0, 1]

# Find the person(s) who stepped on point 9 the most.
nine = np.sum(rand_walk == 9, axis=1)
print(np.where((nine == nine.max()) & (nine > 0))[0])
# []

# Find variance (pos.max - pos.min) for each person in the walk.
variance = rand_walk.max(axis=1) - rand_walk.min(axis=1)
print(variance)
# [5 3 8 3 6]

# Who had the maximum variance?
print(np.where(variance == variance.max())[0])
# [2]

References