# Import the NumPy library
import numpy as np

Z = np.ones((16,16))
k = 4
blocks = Z.reshape((Z.shape[0]//k, k, Z.shape[1]//k, k))
S = blocks.sum(axis=(1,3))

print(S)