吴裕雄--天生自然TensorFlow2教程:多输出感知机及其梯度_分享

import tensorflow as tf

x = tf.random.normal([2, 4])
w = tf.random.normal([4, 3])
b = tf.zeros([3])
y = tf.constant([2, 0])

with tf.GradientTape() as tape:
    tape.watch([w, b])
    # axis=1,表示结果[b,3]中的3这个维度为概率
    prob = tf.nn.softmax(x @ w + b, axis=1)
    # 2 --> 001; 0 --> 100
    loss = tf.reduce_mean(tf.losses.MSE(tf.one_hot(y, depth=3), prob))

grads = tape.gradient(loss, [w, b])
grads[0]
grads[1]