ndarray
¶int matrix[3][5];
matrix = [
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]
]
來寫個 reshape
吧!
def reshape_(in_array: list, new_shape):
# 要怎麼決定 in_array 的 shape?
# 要怎麼檢查裡面的資料型態?
# 要怎麼檢查 new_shape 是合理的?
...
matrix = [
[1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, '13']
]
import numpy as np
array = np.arange(16, dtype=np.int8).reshape(4, 4).copy()
array
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]], dtype=int8)
array.shape
(4, 4)
array.strides
(4, 1)
array.data
<memory at 0x109d8d220>
array.base is None
True
arr_flatten = array.ravel()
arr_flatten
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], dtype=int8)
# Dark Magic
np.lib.stride_tricks
那我們先來看看用這個 Dark Magic 可以怎麼做一個自己的 reshape
def flat_list(ll, acc=None):
if acc is None:
acc = []
for l in ll:
if not isinstance(l, list):
acc.append(l)
else:
acc = flat_list(l, acc)
return acc
flat_list([[1, 2, 3], [4, 5, 6]])
[1, 2, 3, 4, 5, 6]
flat_list([[1, 2, 3], [4, 5, 6], [1, 2, [3, 4]]])
[1, 2, 3, 4, 5, 6, 1, 2, 3, 4]
def reshape_(in_array, new_shape):
flat_array = flat_list(in_array)
new_strides = []
acc = 1
for s in new_shape[::-1]:
new_strides.insert(0, acc*8)
acc *= s
return np.lib.stride_tricks.as_strided(
flat_array,
shape=new_shape,
strides=new_strides
).tolist()
reshape_(
[
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]
],
(5, 3)
)
[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]
reshape_(
[
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]
],
(5, 1, 1, 3)
)
[[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]], [[[13, 14, 15]]]]
reshape_(
[
[1, 2, 3, 4, 5],
[6, 7, 8, 9, 10],
[11, 12, 13, 14, 15]
],
(5, 1, 3, 2)
)
[[[[1, 2], [3, 4], [5, 6]]], [[[7, 8], [9, 10], [11, 12]]], [[[13, 14], [15, 2251799813685248], [0, 0]]], [[[0, 4415747056], [21, 140380210676528], [4437478368, 4409815040]]], [[[4437378560, 0], [140380210604576, 0], [4294967296, 0]]]]
print("1 == 4 * 0 + 1 * 1:", 1 == 4*0 + 1*1)
print("6 == 4 * 1 + 1 * 2:", 6 == 4*1 + 1*2)
1 == 4 * 0 + 1 * 1: True 6 == 4 * 1 + 1 * 2: True
print("arr_flatten[1] == array[0, 1]:", arr_flatten[1] == array[0, 1])
print("arr_flatten[6] == array[1, 2]:", arr_flatten[6] == array[1, 2])
arr_flatten[1] == array[0, 1]: True arr_flatten[6] == array[1, 2]: True
array.strides
(4, 1)
Then we have:
$$ \text{offset}_e = \sum\limits_{j=0}^{m-1} s_j \cdot i_j $$cube = np.arange(3*3*3).reshape((3, 3, 3)).copy()
cube
array([[[ 0, 1, 2], [ 3, 4, 5], [ 6, 7, 8]], [[ 9, 10, 11], [12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23], [24, 25, 26]]])
strided_cube = cube[::2, ::2, ::2]
strided_cube
array([[[ 0, 2], [ 6, 8]], [[18, 20], [24, 26]]])
# strided_cube is a view of cube
cube[0, 0, 0] = 3
strided_cube
array([[[ 3, 2], [ 6, 8]], [[18, 20], [24, 26]]])
Then we have:
$$ \text{offset}_e = \sum\limits_{j=0}^{m-1} s_j \cdot i_j $$cube.ravel()
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26])
cube.shape
(3, 3, 3)
cube.strides # number of bytes
(72, 24, 8)
Then we have:
$$ \text{offset}_e = 72 \cdot i_0 + 24 \cdot i_1 + 8 \cdot i_2 $$strided_cube.ravel()
array([ 0, 2, 6, 8, 18, 20, 24, 26])
strided_cube.shape
(2, 2, 2)
strided_cube.strides
(144, 48, 16)
Then we have:
$$ \begin{align*} \text{offset}_e &= 144 \cdot i_0^{\prime} + 48 \cdot i_1^{\prime} + 16 \cdot i_2^{\prime} \\ &= 72 \cdot (2 \cdot i_0^{\prime}) + 24 \cdot (2 \cdot i_1^{\prime}) + 8 \cdot (2 \cdot i_2^{\prime}) \\ &= 72 \cdot i_0 + 24 \cdot i_1 + 8 \cdot i_2 \end{align*} $$strided_cube.data, type(strided_cube.data)
(<memory at 0x1100ef1f0>, memoryview)
import struct
data_bytes = strided_cube.data.tobytes()
for i in range(8):
elem = struct.unpack(
'q',
data_bytes[i*strided_cube.itemsize:(i+1)*strided_cube.itemsize],
)[0]
print(elem, end=" ")
0 2 6 8 18 20 24 26
np.lib.stride_tricks.as_strided(
cube,
strides=(144, 48, 16),
shape=(2, 2, 2)
)
array([[[ 0, 2], [ 6, 8]], [[18, 20], [24, 26]]])
strided_cube.base is cube
True
cube.base is None
True
# advanced indexing
random_cube = cube[
[0, 0, 0, 0, 2, 2, 2, 2],
[0, 0, 2, 2, 0, 0, 2, 2],
[0, 2, 1, 2, 0, 1, 0, 2]
]
random_cube.reshape((2, 2, 2))
array([[[ 0, 2], [ 7, 8]], [[18, 19], [24, 26]]])
random_cube.base is None
True
cube.ravel().base is cube
True
cube.flatten().base is cube
False
data = np.arange(20, dtype=np.int8)
data
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype=int8)
windows_ = np.empty_like(data, shape=(16, 5))
for i in range(16):
windows_[i, :] = data[i:(i+5)]
windows_
array([[ 0, 1, 2, 3, 4], [ 1, 2, 3, 4, 5], [ 2, 3, 4, 5, 6], [ 3, 4, 5, 6, 7], [ 4, 5, 6, 7, 8], [ 5, 6, 7, 8, 9], [ 6, 7, 8, 9, 10], [ 7, 8, 9, 10, 11], [ 8, 9, 10, 11, 12], [ 9, 10, 11, 12, 13], [10, 11, 12, 13, 14], [11, 12, 13, 14, 15], [12, 13, 14, 15, 16], [13, 14, 15, 16, 17], [14, 15, 16, 17, 18], [15, 16, 17, 18, 19]], dtype=int8)
windows = np.lib.stride_tricks.as_strided(
data,
shape=(16, 5),
strides=(1, 1)
)
windows
array([[ 0, 1, 2, 3, 4], [ 1, 2, 3, 4, 5], [ 2, 3, 4, 5, 6], [ 3, 4, 5, 6, 7], [ 4, 5, 6, 7, 8], [ 5, 6, 7, 8, 9], [ 6, 7, 8, 9, 10], [ 7, 8, 9, 10, 11], [ 8, 9, 10, 11, 12], [ 9, 10, 11, 12, 13], [10, 11, 12, 13, 14], [11, 12, 13, 14, 15], [12, 13, 14, 15, 16], [13, 14, 15, 16, 17], [14, 15, 16, 17, 18], [15, 16, 17, 18, 19]], dtype=int8)
X = windows[:, :4]
Y = windows[:, 4]
X
array([[ 0, 1, 2, 3], [ 1, 2, 3, 4], [ 2, 3, 4, 5], [ 3, 4, 5, 6], [ 4, 5, 6, 7], [ 5, 6, 7, 8], [ 6, 7, 8, 9], [ 7, 8, 9, 10], [ 8, 9, 10, 11], [ 9, 10, 11, 12], [10, 11, 12, 13], [11, 12, 13, 14], [12, 13, 14, 15], [13, 14, 15, 16], [14, 15, 16, 17], [15, 16, 17, 18]], dtype=int8)
Y
array([ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], dtype=int8)
X.base is windows, Y.base is windows
(True, True)
X_ = windows[:, [0, 1, 2, 3]] # X = windows[:, :4]
X_.base is windows
False
basic indexing v.s advanced indexing
a
, which of shape (20, 5)
b
, which of shape (30, 5)
c
, which of shape (50, 1)
create an array out
of shape (20, 30, 50)
, where out[i, j, k]
is given as following
out[i, j, k] = sum([c[k] if a_ > b_ else 0.0 for a_, b_ in zip(a[i], b[j])])
Na, Nb, Nc = 20, 30, 50
a = np.random.rand(Na, 5)
b = np.random.rand(Nb, 5)
c = np.random.rand(Nc, 1)
%%time
out = np.zeros((Na, Nb, Nc), dtype=float)
for i in range(Na):
for j in range(Nb):
for k in range(Nc):
out[i, j, k] = ((a[i] > b[j]) * c[k]).sum()
CPU times: user 198 ms, sys: 28.7 ms, total: 226 ms Wall time: 199 ms
%%time
out_ = (
(
a[:, np.newaxis, np.newaxis, :] > b[np.newaxis, :, np.newaxis, :]
) * c[np.newaxis, np.newaxis, :, :]
).sum(axis=-1)
out_.shape
CPU times: user 2.2 ms, sys: 1.44 ms, total: 3.63 ms Wall time: 1.93 ms
(20, 30, 50)
np.allclose(
out,
out_
)
True
print(a.shape, a[:, np.newaxis, np.newaxis, :].shape)
(20, 5) (20, 1, 1, 5)
a[:, np.newaxis, np.newaxis, :].strides
(40, 0, 0, 8)
out = np.zeros((Na, Nb, Nc), dtype=float)
for i in range(Na):
for j in range(Nb):
for k in range(Nc):
out[i, j, k] = ((a[i] > b[j]) * c[k]).sum()
out_ = (
(
a[:, np.newaxis, np.newaxis, :] > b[np.newaxis, :, np.newaxis, :]
) * c[np.newaxis, np.newaxis, :, :]
).sum(axis=-1)
numpy
: ndarray
NEVER write nested loops ever again
At any time, think of this picture and this talk when you want to apply nested loops on ndarray
s:
ndarray.flags
and more other interesting attributesnumpy
compliant implementationnumpy