Model

NeuralNet(
  (l0): Linear(in_features=6, out_features=256, bias=True)
  (relu): ReLU()
  (bn0): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (l00): Linear(in_features=256, out_features=1, bias=True)
)

Extract layers

model.eval() # In case of BatchNorm and Dropout

 # ReLU activation
ReLU = lambda x: np.maximum(0.0, x)
# GPU torch.Tensor to CPU numpy ndarray
X_data = X_valid.cpu().numpy()

# Fully-connected layer
W0 = model.l0.weight.cpu().detach().numpy() # Weights W
b0 = model.l0.bias.cpu().detach().numpy() # Bias b

# Batch Normalization Layer
bn_mean = model.bn0.running_mean.cpu().numpy()
bn_mean = np.reshape(bn_mean, (256, -1))
bn_var = model.bn0.running_var.cpu().numpy()
bn_var = np.reshape(bn_var, (256, -1))
bn_gamma = model.bn0.weight.cpu().detach().numpy()
bn_gamma = np.reshape(bn_gamma, (256, -1))
bn_beta = model.bn0.bias.cpu().detach().numpy()
bn_beta = np.reshape(bn_beta, (256, -1))
bn_epsilon = model.bn0.eps

# Final output layer
W00 = model.l00.weight.cpu().detach().numpy()
b00 = model.l00.bias.cpu().detach().numpy()

Feed-forward calculation

# First output
out = np.dot(W0, np.transpose(X_data)) + np.tile(np.reshape(b0, (-1, 1)), X_data.shape[0])
out = np.array(list(map(ReLU, out)))

# BatchNorm layer
out = (out-bn_mean)/np.sqrt(bn_var)*bn_gamma+bn_beta # correct formula

# Final output
out = np.dot(W00, L0) + np.tile(np.reshape(b00, (-1, 1)), X_data.shape[0])
out = np.array(list(map(ReLU, out)))