import matplotlib
#%matplotlib notebook
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (7, 3)
import numpy as np
There is of course a while
-loop
a = 3
while a > 0:
print(a)
a -= 1
3 2 1
if-else-elif
¶today = 'Wednesday'
if today == 'Monday':
chance_of_party = 'slim to none'
elif today in ['Tuesday', 'Thursday', 'Sunday']:
chance_of_party = 'poor'
elif today == 'Wednesday':
chance_of_party = 'possible'
elif today in ['Friday', 'Saturday']:
chance_of_party = 'likely'
else:
raise ValueError("'{}' is not a weekday".format(today))
print('Current chance of party is', chance_of_party)
Current chance of party is possible
There is no switch
statement in Python!
Hannes Ovrén
Image source: Fernando Perez
The core numerical array/matrix library
numpy.ndarray
class for N-dimensional arraysIt is common to alias numpy
to np
for less writing
import numpy as np
Let's create a $2 \times 3$ array
A = np.array([[1, 2, 3],
[10, 20, 30]])
A
array([[ 1, 2, 3], [10, 20, 30]])
and transpose it
A.T
array([[ 1, 10], [ 2, 20], [ 3, 30]])
numpy.ndarray
¶ndarray.ndim
ndarray.shape
numpy.dtype
- (float64
, uint8
, bool
, ...)A = np.random.normal(size=(2,3,2))
print('#ndim: {}, shape: {}, #elements: {}'.format(A.ndim, A.shape,
A.size))
print('dtype: {}, itemsize: {}'.format(A.dtype, A.itemsize))
#ndim: 3, shape: (2, 3, 2), #elements: 12 dtype: float64, itemsize: 8
A = np.array([[1,2,3], [4,5,6], [7,8,9]])
A
array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
A[1,2]
6
Access rows
A[0, :]
array([1, 2, 3])
A[0]
array([1, 2, 3])
Note for MATLAB-users: zero-indexed arrays!
Access columns (note the shape!)
c = A[:, 0]
print(c, c.shape, c.ndim)
[1 4 7] (3,) 1
Extract parts
A[:2, :2] # 2x2 block
array([[1, 2], [4, 5]])
A[:, (0, 2)] # first and third columns
array([[1, 3], [4, 6], [7, 9]])
y = np.arange(6).reshape(2, 3)
y
array([[0, 1, 2], [3, 4, 5]])
mask = (y % 2 == 0)
mask
array([[ True, False, True], [False, True, False]], dtype=bool)
y[mask]
array([0, 2, 4])
Only first dimensions must match.
Extracting pixels from an image, gives pixel value "vectors"
image = np.random.randint(0, 255, size=(2,2,3)) # 2x2 RGB image
print(image)
[[[ 79 192 241] [233 243 237]] [[ 74 52 249] [149 200 89]]]
mask = np.array([[True, False], [False, True]]) # 2x2 mask
image[mask]
array([[ 79, 192, 241], [149, 200, 89]])
y = np.array([[1, 2, 3], [4, 5, 6]])
py_mask = [[True, True, False], [False, False, True]]
np_mask = np.array(py_mask)
print(y[np_mask])
print(y[py_mask])
[1 2 6] [4 4 2]
/home/hannes/miniconda/envs/pycourse/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: in the future, boolean array-likes will be handled as a boolean array index from ipykernel import kernelapp as app
What happened?
Most functions/methods return views of an array (sharing data)
A = np.array([[1, 2, 3], [4, 5, 6]])
middle_col = A[:, 1]
middle_col[:] = 99
A
array([[ 1, 99, 3], [ 4, 99, 6]])
Use .copy()
to get a copy
A = np.array([[1, 2, 3], [4, 5, 6]])
middle_col = A[:, 1].copy()
middle_col[:] = 99
A
array([[1, 2, 3], [4, 5, 6]])
Example: We want a function that gives a mean-removed point set
def normalized(pts):
m = np.mean(pts, axis=1).reshape(-1, 1)
pts -= m
return pts
orig = np.random.normal(10., 1.0, size=(2, 4))
orig
array([[ 8.55295309, 9.59138014, 10.27300416, 10.50291701], [ 11.4171007 , 11.41576679, 9.91223316, 10.06468581]])
normalized(orig)
array([[-1.17711051, -0.13868346, 0.54294056, 0.77285341], [ 0.71465408, 0.71332017, -0.79021345, -0.6377608 ]])
orig
array([[-1.17711051, -0.13868346, 0.54294056, 0.77285341], [ 0.71465408, 0.71332017, -0.79021345, -0.6377608 ]])
A = np.array([[1, 2], [3, 4]])
B = np.ones((2, 2)) # 2x2 of ones
print(A)
print(B)
[[1 2] [3 4]] [[ 1. 1.] [ 1. 1.]]
Addition and subtraction
A + B
array([[ 2., 3.], [ 4., 5.]])
A - B
array([[ 0., 1.], [ 2., 3.]])
What about multiplication?
print(A)
print(B)
[[1 2] [3 4]] [[ 1. 1.] [ 1. 1.]]
A * B
array([[ 1., 2.], [ 3., 4.]])
numpy.ndarray
is an array, not a matrix.
For matrix multiplications use np.dot
or np.ndarray.dot
np.dot(A, B)
array([[ 3., 3.], [ 7., 7.]])
A.dot(B)
array([[ 3., 3.], [ 7., 7.]])
Python 3.5 + NumPy 1.10: @
-operator
A @ B
array([[ 3., 3.], [ 7., 7.]])
A = np.arange(10)
print(A, A.shape)
[0 1 2 3 4 5 6 7 8 9] (10,)
B = A.reshape((2, 5))
print(B, B.shape)
[[0 1 2 3 4] [5 6 7 8 9]] (2, 5)
A
and b
points at the same data!
B[0,0] = 100
print(A)
[100 1 2 3 4 5 6 7 8 9]
Automatic calculation of one dimension by setting it to -1
A = np.arange(24)
B = A.reshape(2, -1, 4)
print(B.shape)
(2, 3, 4)
Illegal shapes are... illegal :)
C = A.reshape(2, 4, 4)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-129-c08763be3d59> in <module>() ----> 1 C = A.reshape(2, 4, 4) ValueError: total size of new array must be unchanged
A = np.arange(10).reshape(2,5)
B = A.ravel()
B
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
C = A.flatten() # Copies!
C
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
B[0] = 99
C[1] = 100
A
array([[99, 1, 2, 3, 4], [ 5, 6, 7, 8, 9]])
Linear indexing with ndarray.flat
iterator (no copy)
A.flat[::2] = 0
A
array([[0, 1, 0, 3, 0], [5, 0, 7, 0, 9]])
hstack
, vstack
, dstack
a = np.array([1, 2])
b = np.array([3, 4])
c = np.array([5, 6])
np.hstack((a, b, c))
array([1, 2, 3, 4, 5, 6])
np.vstack((a, b, c))
array([[1, 2], [3, 4], [5, 6]])
concatenate
np.concatenate((a, b, c), axis=0)
array([1, 2, 3, 4, 5, 6])
A = np.array([[1, 2]])
B = np.array([[3],[4]])
print(A)
print('+')
print(B)
[[1 2]] + [[3] [4]]
A + B
array([[4, 5], [5, 6]])
We just witnessed the numpy broadcasting system
img = np.random.randint(0, 255, size=(256, 256, 3)) # 256x256 RGB image
scaled = img * np.array([0.2, 0.5, 0.3])
print(scaled.shape)
(256, 256, 3)
points = np.random.normal(loc=3, size=(3, 100)) # N 3D points
m = np.mean(points, axis=1)
print('Original mean: ', m)
shifted = points - m.reshape(3,-1)
print('Shifted mean:', np.mean(shifted, axis=1))
Original mean: [ 2.86716784 2.93001807 2.98902105] Shifted mean: [ -5.32907052e-16 4.26325641e-16 -3.99680289e-17]
1
(else fail!)1
dimensions are "stretched" to matchA (3d array): 15 x 3 x 5 B (3d array): 15 x 1 x 5 Result (3d array): 15 x 3 x 5
A (4d array): 8 x 1 x 6 x 1 B (3d array): 7 x 1 x 5 Result (4d array): 8 x 7 x 6 x 5
np.linspace(-2, 2, num=10)
array([-2. , -1.55555556, -1.11111111, -0.66666667, -0.22222222, 0.22222222, 0.66666667, 1.11111111, 1.55555556, 2. ])
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
zeros
, ones
, eye
np.zeros((2, 5))
array([[ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]])
np.eye(3)
array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]])
ones_like
, zeros_like
A = np.eye(4)
A
array([[ 1., 0., 0., 0.], [ 0., 1., 0., 0.], [ 0., 0., 1., 0.], [ 0., 0., 0., 1.]])
np.ones_like(A)
array([[ 1., 1., 1., 1.], [ 1., 1., 1., 1.], [ 1., 1., 1., 1.], [ 1., 1., 1., 1.]])
empty
, only allocates memory (very fast). Array items are whatever was in memory.np.empty((2, 5))
array([[ 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0.]])
Use the ndarray.astype()
method:
A = np.array([[1.5, 2.0, 3.7],
[1.0, 2.0, 9.99]])
B = A.astype('uint8')
B
array([[1, 2, 3], [1, 2, 9]], dtype=uint8)
This produces a copy of the array.
It is possible to change memory if required (when do you need to?)
np.ascontiguous()
np.asfortranarray()
or A = np.array([1,2], order='F')
np.require()
-- The most general wayndarray.flags
attributeA = np.array([[1,2], [3, 4]])
b = A[:, 1]
c = np.ascontiguousarray(b)
lines = [str(x.flags).splitlines() for x in (A, b, c)]
print('{:<16s} {:^8s} {:^8s} {:^8s}'.format('', 'A', 'b','c'))
for l in zip(*lines):
#print(l)
vals = [s.split(':')[-1].strip()for s in l]
vals = [v + '*' if not v == vals[0] else v for v in vals]
flagname = l[0].split(':')[0].strip()
print('{:<16s} {:^8s} {:^8s} {:^8s}'.format(flagname, *vals))
A b c C_CONTIGUOUS True False* True F_CONTIGUOUS False False True* OWNDATA True False* True WRITEABLE True True True ALIGNED True True True UPDATEIFCOPY False False False
Module numpy.random
has most distributions avilable.
points = np.random.randint(-3, 3, size=(3, 10))
sigma = 0.2
noisy_points = points + np.random.normal(0, sigma, size=points.shape)
print(noisy_points)
[[ 1.29122068 -0.82561563 0.79110243 2.17405314 1.6519741 0.6800585 -1.83964693 0.99466687 -0.4484195 -0.8430179 ] [ 0.20428938 -1.89918263 -3.13698162 -2.35535725 -3.05498032 -0.79690424 -1.87530157 -1.11215585 0.87633914 1.15527529] [ 0.15777087 -2.83102468 1.82822541 -0.06375282 0.03015451 -2.13302273 -1.22664891 -1.40695077 1.75335996 2.23809561]]
Use np.shuffle()
or np.choice()
A = np.arange(10)
Randomly select 3 items from A
without replacement
np.random.shuffle(A) # shuffle inplace
A[:3]
array([0, 2, 6])
np.random.choice(A, 3, replace=False)
array([0, 2, 8])
numpy.nonzero
or numpy.flatnonzero
A = np.random.randint(0, 20, size=10)
indices = np.flatnonzero(A % 3 == 0)
A
array([14, 14, 14, 5, 1, 5, 17, 10, 5, 3])
indices
array([9])
A[indices]
array([3])
np.sum()
np.mean()
np.max()
np.argmax()
We can use either max()
or numpy.max()
, the latter is faster!
N = 10000
%timeit max(np.random.normal(size=N))
1000 loops, best of 3: 1.15 ms per loop
%timeit np.max(np.random.normal(size=N))
1000 loops, best of 3: 456 µs per loop
matplotlib
: Object-oriented interfacematplotlib.pyplot
: MATLAB-like interfacepyplot
example¶import matplotlib.pyplot as plt
x = np.linspace(0, 4*np.pi)
y = np.sin(x)
plt.figure()
plt.plot(x, y, '--')
plt.show()
There is no need for MATLABs hold on
.
plt.figure()
plt.plot(x, np.sin(x))
plt.plot(x, np.sin(2*x+0.5))
plt.show()
Create automatically using label=
keyword and legend()
y1 = np.sin(x)
y2 = y + np.random.normal(scale=0.1, size=y1.shape)
plt.figure()
plt.plot(x, y1, label='Model')
plt.scatter(x, y2, c='red', marker='x', label='Measurements')
plt.legend(ncol=2)
plt.show()
plot()
command¶plot(x, y) # default line style and color
plot(x, y, 'bo') # blue circle markers
plot(y) # x is index array 0..N-1
plot(y, 'r+') # ditto, but with red plusses
Some useful keyword arguments
plt.figure()
plt.subplot(2, 1, 1)
plt.plot(x, np.sin(x))
plt.subplot(2, 1, 2)
plt.plot(x, np.sin(2 * x))
plt.show()
plt.figure(figsize=(10, 3))
ax1 = plt.subplot(1, 2, 1)
ax2 = plt.subplot(1, 2, 2, sharex=ax1)
ax1.plot(x, np.sin(x))
ax2.plot(x, 5 * np.sin(2 * (x - np.pi / 2)))
plt.show()
plt.figure()
y = np.pi * np.sin(3 * x)
plt.plot(x, y, label='$\pi \cdot \sin(3x)$')
plt.legend()
plt.show()
Press the "save" button :)
show()
with savefig(filename)
dpi=
optionimshow()
defaults to interpolation, but this can be turned off using interpolation='none'
.
image = np.random.randint(0, 255, size=(8, 8))
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.imshow(image)
plt.subplot(1,2,2)
plt.imshow(image, interpolation='none')
plt.show()
See documentation for full list
DataFrame
table-like objectimport pandas as pd
df = pd.read_csv('pojknamn.csv', skiprows=2, encoding='latin1',
index_col='tilltalsnamn', na_values=['..']).transpose()
df.head()
tilltalsnamn | Aaron | Abbas | Abbe | Abdallah | Abdirahim | Abdirahman | Abdulahi | Abdullahi | Abdullah | Abdulrahman | ... | Zakarias | Zakariya | Zakk | Zander | Zebastian | Zeb | Zeke | Zion | Åke | Ömer |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1998 | NaN | NaN | NaN | NaN | NaN | 15 | NaN | 12 | NaN | NaN | ... | NaN | NaN | NaN | NaN | 23 | NaN | NaN | NaN | NaN | NaN |
1999 | NaN | NaN | NaN | NaN | NaN | 15 | NaN | NaN | 12 | NaN | ... | 10 | NaN | NaN | NaN | 22 | NaN | NaN | NaN | NaN | NaN |
2000 | 12 | NaN | NaN | NaN | NaN | 21 | NaN | 17 | 10 | NaN | ... | NaN | NaN | NaN | NaN | 26 | 11 | NaN | NaN | 13 | NaN |
2001 | NaN | NaN | NaN | NaN | NaN | 20 | NaN | 12 | NaN | NaN | ... | 12 | NaN | NaN | NaN | 29 | NaN | NaN | NaN | NaN | NaN |
2002 | 15 | NaN | 11 | NaN | NaN | 16 | NaN | NaN | NaN | NaN | ... | 14 | NaN | NaN | NaN | 26 | 11 | NaN | NaN | 10 | NaN |
5 rows × 824 columns
subset = df[['Hannes', 'Adam']]
subset.sum()
tilltalsnamn Hannes 3084 Adam 9735 dtype: float64
df[['Hannes', 'Andreas', 'Mikael', 'Marcus']].plot()
plt.title('Newly born male names')
<matplotlib.text.Text at 0x7f3f7f299f28>
Hierarchical Data Format v5
/camera1/rgb/frame1
import h5py
with h5py.File('hero3_atan.hdf', 'r') as f:
print('Keys:', list(f.keys()))
dataset = f['K']
print(dataset)
print(dataset.value)
Keys: ['K', 'fps', 'lgamma', 'opt_residual_mean', 'opt_residual_std', 'readout', 'size', 'wc'] <HDF5 dataset "K": shape (3, 3), type "<f8"> [[ 853.12703455 0. 988.06311256] [ 0. 873.54956631 525.71056312] [ 0. 0. 1. ]]
Write math $x = \frac{1}{2}$ inline or as blocks $$ y = \sum_{i=3} x_i^2 $$
Mix text using markdown or HTML with code
Example: Derivative of $\sin(x) e^x$
from sympy import init_printing, symbols, exp, sin, diff
init_printing()
x = symbols('x')
diff(sin(x)*exp(x), x)
cv::Mat
represented by numpy.ndarray
cv2.namedWindow()
, cv2.imshow()
, ...plt.imshow()
, ... (highly recommended)import cv2
# White square on black background
image = np.zeros((128, 128), dtype='uint8') # CV_8U
image[50:80, 50:80] = 255
image_blurred = cv2.blur(image, ksize=(11, 11))
plt.figure(figsize=(8, 3))
plt.gray()
plt.subplot(1, 2, 1)
plt.imshow(image, interpolation='none')
plt.subplot(1, 2, 2)
plt.imshow(image_blurred, interpolation='none')
plt.show()
dtype
¶dt = np.dtype([
('name', np.str_, 16),
('population', np.uint32)
])
print(dt)
[('name', '<U16'), ('population', '<u4')]
cities = np.array([
('Stockholm', 851155),
('Göteborg', 516532),
('Malmö', 293909),
('Linköping', 97428)
],dtype=dt)
np.sum(cities['population'])
1759024
big_cities = cities[cities['population'] > 500000]
print(big_cities['name'])
['Stockholm' 'Göteborg']