tensorflow2.0入门
- 1.1 线性函数
- 1.2 计算sigmoid
- 1.3 计算成本
- 1.4 使用独热编码
- 1.5 初始化0和1
- 2.0 数据预处理
- 2.1 不需要创建占位符placeholders
- 2.2 初始化参数
- 2.3 前向传播
- 2.4 计算成本
- 2.5 反向传播&更新参数
- 定义优化器
- 计算每个参数的梯度,并更新参数
- 2.6 构建模型
- 预测函数
- 模型
- 测试
- epoch=1501,mini_batch_size=32
- epoch = 2500,mini_batch_size = 32
- epoch = 1500, mini_batch_size = 64
- 附带修改后的相关代码库
- tf_utils.py
- 其他一些问题
- tensorflow1和tensorflow2的区别
- tensorflow1
- session.run怎么用?
- tensorflow2
- Tensor和Numpy
- Numpy转换成Tensor
- Tensor转换成numpy
- tf.nn.softmax
- tf.matmul
- tf.random.uniform
- tf.random.truncated_normal
- tf.reduce_mean
- np.reshape
- zip(a,b..)
- tf.argmax()
- tf.onehot()
- axis
目标:
tensorflow入门
修改【参考文章】的代码,使用tensorflow2实现
1.1 线性函数
def linear_function():
"""
实现一个线性功能:
初始化W,类型为tensor的随机变量,维度为(4,3)
初始化X,类型为tensor的随机变量,维度为(3,1)
初始化b,类型为tensor的随机变量,维度为(4,1)
返回:
result - 运行了session后的结果,运行的是Y = WX + b
"""
np.random.seed(1)
X = np.random.randn(3, 1)
W = np.random.randn(4, 3)
b = np.random.randn(4, 1)
print("X:",X)
print("X.shape:",X.shape)
print("X.type",type(X))
"""
使用tf.matmul(矩阵乘法)之后,输入两个np的数组,输出tf的张量
"""
Y = tf.matmul(W, X) + b
return Y
测试:
result = linear_function()
print("result = " + str(result))
print("result.type",type(result))
输出:
result = tf.Tensor(
[[-2.15657382]
[ 2.95891446]
[-1.08926781]
[-0.84538042]], shape=(4, 1), dtype=float64)
result.type <class 'tensorflow.python.framework.ops.EagerTensor'>
1.2 计算sigmoid
def sigmoid(z):
"""
实现使用sigmoid函数计算z
参数:
z - 输入的值,标量或矢量
返回:
result - 用sigmoid计算z的值
"""
result = tf.sigmoid(z)
return result
测试:
# Tensor一定是以下类型 `float16`, `float32`, `float64`, `complex64`, or `complex128
print("sigmoid(0) = " + str(sigmoid(0.)))
print("sigmoid(12) = " + str(sigmoid(12.)))
输出:
sigmoid(0) = tf.Tensor(0.5, shape=(), dtype=float32)
sigmoid(12) = tf.Tensor(0.9999938, shape=(), dtype=float32)
1.3 计算成本
1.4 使用独热编码
# 取一个标签矢量和C类种数,返回一个独热码
def one_hot_matrix(labels,C):
"""
创建一个矩阵,其中第i行对应第i个类号,第j列对应第j个训练样本
所以如果第j个样本对应着第i个标签,那么entry (i,j)将会是1
参数:
labels - 标签向量
C - 分类数
返回:
one_hot - 独热矩阵
"""
C = tf.constant(C)
one_hot = tf.one_hot(indices=labels,depth=C,axis=0)
return one_hot
测试:
labels = np.array([1,2,3,0,2,1])
one_hot = one_hot_matrix(labels,C=4)
print(str(one_hot))
输出:
tf.Tensor(
[[0. 0. 0. 1. 0. 0.]
[1. 0. 0. 0. 0. 1.]
[0. 1. 0. 0. 1. 0.]
[0. 0. 1. 0. 0. 0.]], shape=(4, 6), dtype=float32)
1.5 初始化0和1
def ones(shape):
"""
创建一个维度为shape的变量,其值全为1
参数:
shape - 你要创建的数组的维度
返回:
ones - 只包含1的数组
"""
rst = tf.ones(shape)
return rst
测试:
print ("ones = " + str(ones([3])))
输出:
ones = tf.Tensor([1. 1. 1.], shape=(3,), dtype=float32)
2.0 数据预处理
对数据进行扁平化,然后再除以255进行归一化数据,然后再把每个标签转换成【独热码】的形式
无需变动
# 加载数据
def get_data():
X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = tf_utils.load_dataset()
# index = 11
# plt.imshow(X_train_orig[index])
# print("Y = " + str(np.squeeze(Y_train_orig[:, index])))
# print("Y with no squeeze:",Y_train_orig[:,index])
# plt.show()
# 对数据进行扁平化,然后再除以255进行归一化数据,然后再把每个标签转换成【独热码】的形式
# 扁平化数据
"""
X_train_orig 是 1080*64*64*3的数组
"""
# print("X_train_orig.shape:", X_train_orig.shape)
# 每一列就是一个样本
"""
reshape之后的形状是,1080*1,所以需要转置一下
"""
X_train_flatten = X_train_orig.reshape(X_train_orig.shape[0], -1).T
X_test_flatten = X_test_orig.reshape(X_test_orig.shape[0], -1).T
# 归一化数据
X_train = X_train_flatten / 255
X_test = X_test_flatten / 255
# Y转换成的独热码的形式
Y_train = tf_utils.convert_to_one_hot(Y_train_orig, 6)
Y_test = tf_utils.convert_to_one_hot(Y_test_orig, 6)
# print("训练集样本数 = " + str(X_train.shape[1]))
# print("测试集样本数 = " + str(X_test.shape[1]))
# print("X_train.shape: " + str(X_train.shape))
# print("Y_train.shape: " + str(Y_train.shape))
# print("X_test.shape: " + str(X_test.shape))
# print("Y_test.shape: " + str(Y_test.shape))
return X_train, Y_train, X_test, Y_test, classes
2.1 不需要创建占位符placeholders
2.2 初始化参数
def initialize_parameters():
"""
初始化神经网络的参数,参数的维度如下:
W1 : [25, 12288]
b1 : [25, 1]
W2 : [12, 25]
b2 : [12, 1]
W3 : [6, 12]
b3 : [6, 1]
返回:
parameters - 包含了W和b的字典
"""
# 指定随机种子
tf.random.set_seed(1)
# 没找到xavier用什么代替,glorot_normal应该比较接近
initializer = tf.initializers.glorot_normal(seed=1)
W1 = tf.Variable(initializer([25,12288]),name="W1")
b1 = tf.Variable(tf.zeros([25,1]),name="b1")
W2 = tf.Variable(initializer([12,25]),name="W2")
b2 = tf.Variable(tf.zeros([12,1]),name="b2")
W3 = tf.Variable(initializer([6,12]),name="W3")
b3 = tf.Variable(tf.zeros([6,1]),name="b3")
parameters = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
return parameters
测试:
parameters = initialize_parameters()
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
输出:
数据太多,这里就省略的写了
W1 = <tf.Variable 'W1:0' shape=(25, 12288) dtype=float32, numpy=
array([[..],..,[..]], dtype=float32)>
b1 = <tf.Variable 'b1:0' shape=(25, 1) dtype=float32, numpy=
array([[0.],
...
[0.]], dtype=float32)>
W2 = <tf.Variable 'W2:0' shape=(12, 25) dtype=float32, numpy=
array([[..],..,[..]],
dtype=float32)>
b2 = <tf.Variable 'b2:0' shape=(12, 1) dtype=float32, numpy=
array([[0.],
...
[0.]], dtype=float32)>
2.3 前向传播
# 三层 多分类模型
def forward_propagation(X,parameters):
"""
实现一个模型的前向传播,模型结构为LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
参数:
X - 维度为(输入节点数量,样本数量)
parameters - 包含了W和b的参数的字典
返回:
Z3 - 最后一个LINEAR节点的输出
"""
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
Z1 = tf.matmul(W1,X) + b1
A1 = tf.nn.relu(Z1)
Z2 = tf.matmul(W2,A1) + b2
A2 = tf.nn.relu(Z2)
Z3 = tf.matmul(W3,A2) + b3
return Z3
测试:
X一定要是float类型
X = tf.constant(1.,shape=(12288,1))
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
print("Z3 = " + str(Z3))
输出:
Z3 = tf.Tensor(
[[-0.756837 ]
[ 0.8378385 ]
[ 0.04015765]
[-0.64093757]
[ 0.36442205]
[ 0.880596 ]], shape=(6, 1), dtype=float32)
2.4 计算成本
# 计算成本
def compute_cost(Z3,Y):
"""
计算成本
参数:
Z3 - 前向传播的结果
Y - 标签,一个占位符,和Z3的维度相同
返回:
cost - 成本值
"""
# 转置
logits = tf.transpose(Z3)
labels = tf.transpose(Y)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels))
return cost
测试:
X = tf.constant(1.,shape=([12288,1]))
Y = tf.constant(1.,shape=([6,1]))
parameters = initialize_parameters()
Z3 = forward_propagation(X,parameters)
cost = compute_cost(Z3,Y)
print("cost = " + str(cost))
输出:
cost = tf.Tensor(11.899788, shape=(), dtype=float32)
2.5 反向传播&更新参数
定义优化器
# 定义优化器
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
计算每个参数的梯度,并更新参数
然后在【自动求导】机制的帮助下,计算每个参数的梯度,并更新参数
# 获得【自动求导】的导数
grads = tape.gradient(mini_batch_cost, list(parameters.values()))
# 更新参数
optimizer.apply_gradients(grads_and_vars=zip(grads, list(parameters.values())))
2.6 构建模型
预测函数
def predict(X_train,Y_train,X_test, Y_test, parameters):
##################################### 训练集
# 预测值
Z = forward_propagation(X_train, parameters)
# 找到最大值的下标,在向量列中找最大值的索引
Z = tf.argmax(Z, axis=0)
# 转换成独热码的形式
# axis表示填充方向,列填充
Z = tf.one_hot(Z,depth=Y_train.shape[0],axis=0)
# 强制类型转换,和需要比较的标签的类型一致
Z = tf.cast(Z, dtype=Y_train.dtype)
# 预测值 与 真实值进行【比较】,强转成默认的int类型,我这里是int64
# 得到一个True,False数组
correct = tf.equal(Z, Y_train)
# 强转,把bool数组转换成int数组
correct = tf.cast(correct, dtype=tf.int64)
# 求每一列的平均数,
correct = tf.reduce_mean(correct,axis=0)
# 强转,避免产生小数
correct = tf.cast(correct,dtype=tf.int64)
# 记录正确的数量,对所有元素求和
total_correct = tf.reduce_sum(correct)
# 样本数,这里是m
total_number = X_train.shape[1]
# 正确率 = 正确数 / 总样本数
train_acc = total_correct / total_number
print("训练集准确率:", train_acc.numpy())
################################## 预测集
# 预测值
Z = forward_propagation(X_test, parameters)
# 找到最大值的下标,在向量列中找最大值的索引
Z = tf.argmax(Z, axis=0)
# 转换成独热码的形式
# axis表示填充方向,列填充
Z = tf.one_hot(Z, depth=Y_test.shape[0], axis=0)
# 强制类型转换,和需要比较的标签的类型一致
Z = tf.cast(Z, dtype=Y_test.dtype)
# 预测值 与 真实值进行【比较】,强转成默认的int类型,我这里是int64
# 得到一个True,False数组
correct = tf.equal(Z, Y_test)
# 强转,把bool数组转换成int数组
correct = tf.cast(correct, dtype=tf.int64)
# 求每一列的平均数,
correct = tf.reduce_mean(correct, axis=0)
# 强转,避免产生小数
correct = tf.cast(correct, dtype=tf.int64)
# 记录正确的数量,对所有元素求和
total_correct = tf.reduce_sum(correct)
# 样本数,这里是m
total_number = X_test.shape[1]
# 正确率 = 正确数 / 总样本数
test_acc = total_correct / total_number
print("测试集准确率:", test_acc.numpy())
return train_acc, test_acc
模型
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001, num_epochs=1500, mini_batch_size=32,
print_cost=True, is_plot=True):
"""
实现一个三层的TensorFlow神经网络:LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX
参数:
X_train - 训练集,维度为(输入大小(输入节点数量) = 12288, 样本数量 = 1080)
Y_train - 训练集分类数量,维度为(输出大小(输出节点数量) = 6, 样本数量 = 1080)
X_test - 测试集,维度为(输入大小(输入节点数量) = 12288, 样本数量 = 120)
Y_test - 测试集分类数量,维度为(输出大小(输出节点数量) = 6, 样本数量 = 120)
learning_rate - 学习速率
num_epochs - 整个训练集的遍历次数
mini_batch_size - 每个小批量数据集的大小
print_cost - 是否打印成本,每100代打印一次
is_plot - 是否绘制曲线图
返回:
parameters - 学习后的参数
"""
tf.random.set_seed(1)
seed = 3
# 输入结点数量 & 样本数量
n_x, m = X_train.shape
# n_y没有用上
n_y = Y_train.shape[0]
costs = []
# # 强转成float类型,不然后面矩阵乘法报错
# ??? 强转之后,后面的数据集无法shuffle。但是shuffle之后,不强转也行
# X_train = tf.cast(X_train, tf.float32)
# X_test = tf.cast(X_test, tf.float32)
# 初始化参数
parameters = initialize_parameters()
# 优化器
optimizer = tf.optimizers.Adam(learning_rate=learning_rate)
# 开始训练
for epoch in range(num_epochs):
# 每个epoch个【成本】
epoch_cost = 0
# mini_batch的数量
num_mini_batches = m // mini_batch_size
# 每个epoch打乱batches,每次epoch遍历的batches的顺序都不同
seed = seed + 1
# int32,int64才可以分割
mini_batches = tf_utils.random_mini_batches(X_train,Y_train,mini_batch_size,seed)
# 对于每个batch
for step, (X, Y) in enumerate(mini_batches):
with tf.GradientTape() as tape:
# 前向传播
Z3 = forward_propagation(X,parameters)
# 已经是独热码的形式了
# Y = tf.one_hot(Y_train)
# 计算成本
mini_batch_cost = compute_cost(Z3,Y)
# 计算这个mini_batch在本次epoch中的误差
epoch_cost = epoch_cost + mini_batch_cost / num_mini_batches
# 一次mini_batch过后
# 计算mini_batch_cost对各个参数的梯度
grads = tape.gradient(mini_batch_cost, list(parameters.values()))
# 更新参数
optimizer.apply_gradients(grads_and_vars=zip(grads, list(parameters.values())))
# 记录并打印成本
if epoch % 5 == 0:
costs.append(epoch_cost)
if print_cost and epoch % 100 == 0:
print("epoch = " + str(epoch) + " epoch_cost = " + str(epoch_cost))
if is_plot:
plt.plot(np.squeeze(costs))
plt.ylabel("cost")
plt.xlabel("iterations(per tens)")
plt.title("learning_rate = " + str(learning_rate))
plt.show()
# 这两个变量没有用到
train_acc, test_acc = predict(X_train,Y_train,X_test,Y_test,parameters)
return parameters
测试
time.clock()用不了,使用perf_counter()代替
def main():
X_train, Y_train, X_test, Y_test, classes = get_data()
# 开始时间
start_time = time.perf_counter()
# 开始训练
parameters = model(X_train, Y_train, X_test, Y_test,num_epochs=1501,mini_batch_size=32)
# 结束时间
end_time = time.perf_counter()
# 计算时差
print("CPU的执行时间 = " + str(end_time - start_time) + " 秒")
if __name__ == '__main__':
main()
epoch=1501,mini_batch_size=32
epoch = 0 epoch_cost = tf.Tensor(1.8597277, shape=(), dtype=float32)
epoch = 100 epoch_cost = tf.Tensor(1.0261304, shape=(), dtype=float32)
epoch = 200 epoch_cost = tf.Tensor(0.8528452, shape=(), dtype=float32)
epoch = 300 epoch_cost = tf.Tensor(0.7303493, shape=(), dtype=float32)
epoch = 400 epoch_cost = tf.Tensor(0.64019406, shape=(), dtype=float32)
epoch = 500 epoch_cost = tf.Tensor(0.5496202, shape=(), dtype=float32)
epoch = 600 epoch_cost = tf.Tensor(0.50004256, shape=(), dtype=float32)
epoch = 700 epoch_cost = tf.Tensor(0.4075786, shape=(), dtype=float32)
epoch = 800 epoch_cost = tf.Tensor(0.35448566, shape=(), dtype=float32)
epoch = 900 epoch_cost = tf.Tensor(0.30879182, shape=(), dtype=float32)
epoch = 1000 epoch_cost = tf.Tensor(0.26076597, shape=(), dtype=float32)
epoch = 1100 epoch_cost = tf.Tensor(0.21424887, shape=(), dtype=float32)
epoch = 1200 epoch_cost = tf.Tensor(0.18786089, shape=(), dtype=float32)
epoch = 1300 epoch_cost = tf.Tensor(0.14649896, shape=(), dtype=float32)
epoch = 1400 epoch_cost = tf.Tensor(0.11711984, shape=(), dtype=float32)
epoch = 1500 epoch_cost = tf.Tensor(0.093680285, shape=(), dtype=float32)
训练集准确率: 0.987037037037037
测试集准确率: 0.7416666666666667
CPU的执行时间 = 557.1234039 秒
epoch = 2500,mini_batch_size = 32
epoch = 0 epoch_cost = tf.Tensor(1.8597277, shape=(), dtype=float32)
epoch = 100 epoch_cost = tf.Tensor(1.0261304, shape=(), dtype=float32)
epoch = 200 epoch_cost = tf.Tensor(0.8528452, shape=(), dtype=float32)
epoch = 300 epoch_cost = tf.Tensor(0.7303493, shape=(), dtype=float32)
epoch = 400 epoch_cost = tf.Tensor(0.64019406, shape=(), dtype=float32)
epoch = 500 epoch_cost = tf.Tensor(0.5496202, shape=(), dtype=float32)
epoch = 600 epoch_cost = tf.Tensor(0.50004256, shape=(), dtype=float32)
epoch = 700 epoch_cost = tf.Tensor(0.4075786, shape=(), dtype=float32)
epoch = 800 epoch_cost = tf.Tensor(0.35448566, shape=(), dtype=float32)
epoch = 900 epoch_cost = tf.Tensor(0.30879182, shape=(), dtype=float32)
epoch = 1000 epoch_cost = tf.Tensor(0.26076597, shape=(), dtype=float32)
epoch = 1100 epoch_cost = tf.Tensor(0.21424887, shape=(), dtype=float32)
epoch = 1200 epoch_cost = tf.Tensor(0.18786089, shape=(), dtype=float32)
epoch = 1300 epoch_cost = tf.Tensor(0.14649896, shape=(), dtype=float32)
epoch = 1400 epoch_cost = tf.Tensor(0.11711984, shape=(), dtype=float32)
epoch = 1500 epoch_cost = tf.Tensor(0.093680285, shape=(), dtype=float32)
epoch = 1600 epoch_cost = tf.Tensor(0.07261035, shape=(), dtype=float32)
epoch = 1700 epoch_cost = tf.Tensor(0.057198185, shape=(), dtype=float32)
epoch = 1800 epoch_cost = tf.Tensor(0.04466948, shape=(), dtype=float32)
epoch = 1900 epoch_cost = tf.Tensor(0.032870866, shape=(), dtype=float32)
epoch = 2000 epoch_cost = tf.Tensor(0.026318526, shape=(), dtype=float32)
epoch = 2100 epoch_cost = tf.Tensor(0.021889813, shape=(), dtype=float32)
epoch = 2200 epoch_cost = tf.Tensor(0.0192028, shape=(), dtype=float32)
epoch = 2300 epoch_cost = tf.Tensor(0.010663158, shape=(), dtype=float32)
epoch = 2400 epoch_cost = tf.Tensor(0.010280645, shape=(), dtype=float32)
训练集准确率: 0.9990740740740741
测试集准确率: 0.7583333333333333
CPU的执行时间 = 859.9762284000001 秒
epoch = 1500, mini_batch_size = 64
测试了一下评论区里面老哥说的mini_batch_size = 64,感觉效果也差不多
epoch = 0 epoch_cost = tf.Tensor(1.910535, shape=(), dtype=float32)
epoch = 100 epoch_cost = tf.Tensor(1.0095037, shape=(), dtype=float32)
epoch = 200 epoch_cost = tf.Tensor(0.73019004, shape=(), dtype=float32)
epoch = 300 epoch_cost = tf.Tensor(0.5799414, shape=(), dtype=float32)
epoch = 400 epoch_cost = tf.Tensor(0.48410848, shape=(), dtype=float32)
epoch = 500 epoch_cost = tf.Tensor(0.39232883, shape=(), dtype=float32)
epoch = 600 epoch_cost = tf.Tensor(0.3267515, shape=(), dtype=float32)
epoch = 700 epoch_cost = tf.Tensor(0.27411878, shape=(), dtype=float32)
epoch = 800 epoch_cost = tf.Tensor(0.22287625, shape=(), dtype=float32)
epoch = 900 epoch_cost = tf.Tensor(0.18437535, shape=(), dtype=float32)
epoch = 1000 epoch_cost = tf.Tensor(0.15225355, shape=(), dtype=float32)
epoch = 1100 epoch_cost = tf.Tensor(0.11728267, shape=(), dtype=float32)
epoch = 1200 epoch_cost = tf.Tensor(0.09166909, shape=(), dtype=float32)
epoch = 1300 epoch_cost = tf.Tensor(0.08107512, shape=(), dtype=float32)
epoch = 1400 epoch_cost = tf.Tensor(0.05561387, shape=(), dtype=float32)
训练集准确率: 0.9981481481481481
测试集准确率: 0.7833333333333333
CPU的执行时间 = 379.36660390000003 秒
附带修改后的相关代码库
tf_utils.py
import h5py
import numpy as np
import tensorflow as tf
import math
def load_dataset():
train_dataset = h5py.File('datasets/train_signs.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_signs.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
"""
Creates a list of random mini_batches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random mini_batches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[1] # number of training examples
mini_batches = []
np.random.seed(seed)
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0], m))
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_mini_batches = math.floor(
m / mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_mini_batches):
mini_batch_X = shuffled_X[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:, num_complete_mini_batches * mini_batch_size: m]
mini_batch_Y = shuffled_Y[:, num_complete_mini_batches * mini_batch_size: m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def convert_to_one_hot(Y, C):
Y = np.eye(C)[Y.reshape(-1)].T
return Y
def predict(X, parameters):
W1 = tf.convert_to_tensor(parameters["W1"])
b1 = tf.convert_to_tensor(parameters["b1"])
W2 = tf.convert_to_tensor(parameters["W2"])
b2 = tf.convert_to_tensor(parameters["b2"])
W3 = tf.convert_to_tensor(parameters["W3"])
b3 = tf.convert_to_tensor(parameters["b3"])
params = {"W1": W1,
"b1": b1,
"W2": W2,
"b2": b2,
"W3": W3,
"b3": b3}
z3 = forward_propagation_for_predict(X, params)
prediction = tf.argmax(z3)
return prediction
def forward_propagation_for_predict(X, parameters):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SOFTMAX
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""
# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
# Numpy Equivalents:
Z1 = tf.add(tf.matmul(W1, X), b1) # Z1 = np.dot(W1, X) + b1
A1 = tf.nn.relu(Z1) # A1 = relu(Z1)
Z2 = tf.add(tf.matmul(W2, A1), b2) # Z2 = np.dot(W2, a1) + b2
A2 = tf.nn.relu(Z2) # A2 = relu(Z2)
Z3 = tf.add(tf.matmul(W3, A2), b3) # Z3 = np.dot(W3,Z2) + b3
return Z3
其他一些问题
tensorflow1和tensorflow2的区别
tensorflow1
session.run怎么用?
session.run(
fetches,
feed_dict=None,
options=None,
run_metadata=None
)
import tensorflow as tf
a = tf.add(1, 2)
b = tf.multiply(a, 2)
session = tf.Session()
v1 = session.run(b)
print(v1)
replace_dict = {a:20}
v2 = session.run(b, feed_dict = replace_dict)
print(v2)
输出:
6
40
tensorflow2
tensorflow2取消了session机制
Tensor和Numpy
Numpy转换成Tensor
TensorFlow网络在输入Numpy数据时会自动转换为Tensor来处理
也可以使用tf.convert_to_tensor
显式转换
a = np.arange(0, 5)
b = tf.convert_to_tensor(a, dtype=tf.int64)
print("a:", a)
print("b:", b)
输出:
a: [0 1 2 3 4]
b: tf.Tensor([0 1 2 3 4], shape=(5,), dtype=int64)
Tensor转换成numpy
# A是Tensor,B是numpy
A = tf.Variable(1,dtype=tf.int64)
B = A.numpy()
tf.nn.softmax
一共有n个数,那么第i个数的softmax值为
这样,每个就转换成了0~1的数,可以看作是 概率。
tf.matmul
矩阵乘法
使用tf.matmul(矩阵乘法)之后,输入两个np的数组,输出tf的张量
tf.random.uniform
返回形状为shape的矩阵,产生的值介于minval~maxval之间,均匀分布
tf.random.uniform(
shape,#形状
minval,#最小值
maxval,#最大值
dtype,#变量类型
seed,#随机数种子
name
)
tf.random.truncated_normal
参考自:截断正态分布
截断正态分布,除了具有正态分布的参数——均值、方差之外,还有两外两个参数:
- 取值上限
- 取值下限
需要注意的是,任何密度函数曲线下方的面积是1。因此,截断,并不意味着直接把原始密度函数两边去掉一部分;而是,截断后概率密度函数曲线会有一些变化,使得总面积仍然为1。
tf.reduce_mean
tf.reduce_mean 函数用于计算张量tensor沿着指定的数轴(tensor的某一维度)上的的平均值
reduce_mean(
input_tensor,
axis=None,指定的轴,如果不指定,则计算所有元素的均值;
keep_dims=False,
name=None,
reduction_indices=None
)
np.reshape
array.reshape(shape)
np.reshape(array,shape)
shape = (1,-1)
-1表示它维度 交给numpy计算
执行后,返回一个新的数组,并不会改变原数组
zip(a,b…)
取a,b的一行,打包成元组列表
tf.argmax()
在找出数组中最大的数的索引
tf.argmax(
array,
axis = 0 在向量列中找最大值的索引
)
tf.onehot()
独热码
tf.onehot(
array,
deepth,填充向量的深度
axis = 0,沿列方向填充
)
axis
axis = 0,表示沿着列向量的方向
axis = 1,表示沿着行向量的方向
axis = None,表示取所有元素