7.1 准备性认识
1.卷积网络的神经科学基础
方向选择性细胞:处于视觉系统较为前面的神经元细胞会在瞳孔瞥见眼前物体的边缘,并且这个边缘指向某个方向时呈现出活跃的状态。
图像通过视网膜刺激,传递到头后部的V1区[初级视觉皮层]:初级视觉皮层可以分为简单细胞和复杂细胞。
V1区的简单细胞感受野较小,呈狭长形,对小光点有反应而对于弥散光无反应。
2.神经认知机
神经认知机中包含两类神经元:S-cell元,承担特征抽取;C-cells抗变形
S-cell中有两个重要的参数,感受野和阈值
感受野:确定输出链接的数目
阈值:用于控制对子特征的反应程度
C-cells会对每个S-Cells元的感光区施加正太分布的视觉模糊量[激活函数+最大池化]
7.2卷积
卷积神经网络(cnn)
1.卷积运算
卷积运算具有3个重要特性:稀疏链接,参数共享,等变表示
<1>稀疏链接:减少权重参数,降低过拟合,提升模型的泛化能力。
<2>参数共享:
2.多卷积核
3.卷积层的代码实现
import tensorflow as tf
from numpy as np
I=np.array([[2],[1],[2],[-1]],[[0],[-1],[3],[0]],[[2],[1],[-1],[4]],[[-2],[0],[-3],[4]],dtype="float32").reshape(1,4,4,1)
model=tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(input_shape=(4,4,1),
filters=1,kernal_size=(2,2),strides=(1,1),padding='same',
use_bias=True,kernal_initializer=tf.constant_initializer([[-1,4],[2,1]]),bias_initializer='ones'))
output=model(I,training=True)
print(output)
7.3 池化
##tensorflow2.0 model create
import tensorflow as tf
from numpy as np
I=np.array([[2],[1],[2],[-1]],[[0],[-1],[3],[0]],[[2],[1],[-1],[4]],[[-2],[0],[-3],[4]],dtype="float32").reshape(1,4,4,1)
model=tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(input_shape=(4,4,1),
filters=1,kernal_size=(2,2),strides=(1,1),padding='same',
use_bias=True,kernal_initializer=tf.constant_initializer([[-1,4],[2,1]]),bias_initializer='ones'))
model.add(tf.keras.layers.MaxPool2D(input_shape=(4,4,1),pool_size=(2,2),strides=(2,2),padding="valid"))
output=model(I,training=True)
print(output)
7.4 实现卷积神经网络的实例
1.一般框架
输入层,卷积层,池化层,全连接层,softmax层
2.基于Cifar-10数据集使用简单卷积神经网络实现分类
[1]导入类库
[2]定义一个用于读取文件列表中每个文件的数据,FixedLengthRecordDatasets()函数可以从二进制文件中读取固定长度的数据。
decode_raw()函数可以将二进制串解析为图像对应的像素
strided_slice()函数截取[0,1)区间数据作为label,其他数据作为图像数据。
reshape()函数将1维数据转化为3维数据
transpose()函数将[depth,height,width]转换为[height,width,depth]
[3]定义一个数据读取和可以选择图像增强的函数
join()函数可以拼接文件完整的路径
distorted–判断是否进行数据增强
数据增强:
- 使用tensorFlow的image。random_copy()函数将[32323]图片随机裁剪为[24243]
- 使用random_flip_left_right()函数进行左右翻转功能
- 使用image.random_brightness()函数对图像进行随机亮度调整
- 使用image.per_image_standardization()函数进行归一化操作
- 使用shuffle()函数打乱元素数据
- 使用batch()函数组织数据batch
综上Cifar10_data.py
##cifar-10 datasets classification
##os -- joint the address
import os
import tensorflow as tf
num_classes=10
#the total num of test and train
num_examples_pre_epoh_for_train=50000
num_examples_pre_epoch_for_eval=10000
#return Cifar-10 datasets
class CIFAR10Record(object):
labels=[]
images=[]
def read_cifar10(filename):
result=CIFAR10Record();
##constant
label_bytes=1
result.height=32
result.width=32
result.depth=3
image_bytes=result.height*result.width*result.depth ##3072
record_bytes=label_bytes+image_bytes ##3073
datasets=tf.data.FixedLengthRecordDataset(filenames=filename,record_bytes=record_bytes)
for sample in datasets:
##decode_raw transfer bytes to pixes
result_bytes=tf.io.decode_raw(sample,tf.uint8)
result_label=tf.cast(tf.strided_slice(result_bytes,[0],[1]),tf.int32)
depth_major=tf.reshape(tf.strided_slice(result_bytes,[label_bytes],[label_bytes+image_bytes]),[result.depth,result.height,result.width])
result.labels.append(result_label)
result.images.append(tf.transpose(depth_major,[1,2,0]))
return result
####fixedLengthRecordReader///read()
def inputs(data_dir,batch_size,distorted):
if distorted != None :
filename=[os.path.join(data_dir,"data_batch_%d.bin"%i) for i in range(1,6)]
read_input=read_cifar10(filename)
reshaped_image=tf.cast(read_input.images,tf.float32)
#random clip [32*32*3] -- [24*24*3]
cropped_image=tf.images.random_crop(reshaped_image,[50000,24,24,3])
##random left-right flip
flipped_image=tf.image.random_flip_left_right(cropped_image)
##random lightness adjust
adjusted_brightness=tf.image.random_brightness(flipped_image,max_delta=0.8)
##random contrast adjust
adjusted_contrast=td.image.random_contrast(adjusted_brightness,lower=0.2,upper=1.8)
##standardization
float_image=tf.image.per_image_standardization(adjusted_contrast)
read_input.labels=list(chain.from_iterable(read_input.labels))
min_queue_examples=int(num_examples_pre_epoch_for_eval*0.4)
print('FIlling queue with %d CIFAR images before starting to train.This will take a few minites.'%min_queue_examples)
train_dataset=tf.data.Dataset.from_tensor_slices((float_image,read_input.labels))
train_dataset=train_dataset.shuffle(buffer_size=min_queue_examples+3*batch_size).batch(batch_size=batch_size)
return train_dataset
else:
read_input=read_cifar10(data_dir)
reshaped_image=tf.cast(read_input.images,tf.float32)
resized_image=tf.image.resized_image_with_crop_or_pad(reshaped_image,24,24)
float_image=tf.image.per_image_standardization(resized_image)
read_input.labels=list(chain.from_iterable(read_input.labels))
test_dataset=td.data.Dataset.from_tensor_slices((float_image,read_input.labels))
test_dataset=test_dataset.batch(batch_size=batch_size)
return test_dataset
[4]循环神经网络的设计
池化操作的尺寸大于步长的做法可以增加数据的丰富性
import tensorflow as tf
import time
import math
import Cifar10_data
num_examples_for_eval=10000
data_dir="../Cifar10_data/cifar-10-batches-bin"
test_data="../Cifar10_data/cifar-10-batches-bin/test_batch.bin"
dataset_for_train=Cifar10_data.input(data_dir=data_dir,batch_size=100,distorted=True)
dataset_for_test=Cifar10_data.input(data_dir=test_data,batch_size=100,distorted=None)
class Sample_CNN(self.keras.Model):
def __init__(delf):
super(Sample_CNN,self).__init__()
self.conv1=layers.Conv2D(filters=64,kernal_size=(5,5),padding="SAME",
activation='relu',use_bias=True,bias_initializer='zeros',input_shape=(24,24,3))
self.maxpoll1=layers.MxPool2D(pool_size=(3,3),strides=(2,2),padding='same')
self.conv2=layers.Conv2D(filters=64,kernal_size=(5,5),strides=(1,1),padding="SAME",
activation='relu',use_bias=True,bias_initializer='zeros')
self.maxpoll2=layers.MxPool2D(pool_size=(3,3),strides=(2,2),padding='same')
self.flatten=layers.Flatten()
#print(Sample_CNN.output.shape)
#(None,2304)
self.f1=layers.Dense(units=384,activation='relu',use_bias=True,biao_initializer='zeros')
self.f2=layers.Dense(units=192,activation='relu',use_bias=True,bias_initializer='zeros')
self.f3=layers.Dense(units=10,activation='softmax',use_bias=True,bias_initializer='zeros')
def call(sel,inputs):
x=self.conv1(inputs)
x=self.maxpoll1(x)
x=self.conv2(x)
x=self.maxpoll2(x)
x=self.flatten(x)
x=self.f1(x)
x=self.f2(x)
return self.f3(x)
sample_CNN=Sample_CNN()
optimzer=tf.keras.optimzer.Adam(1e-3)
loss_fn=tf.keras.losses.SparseCategoricalCrossentropy()
train_loss=tf.keras.metrics.Mean(name='traian_loss')
train_accuracy=tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss=tf.keras.metrics.Means(name='test_loss')
test_accuracy=tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
@tf.function
def train_step(train_images,train_labels):
with tf.GradientTape() as tape:
predictions=sample_CNN(train_images)
loss=loss_fn(train_labels,predictions)
gradients=tape.gradient(loss,sample_CNN.trainable_variables)
optimzer.apply_gradients(zip(gradients,sample_CNN.trainable_variables))
train_loss(loss)
train_accuracy(train_labels,predictions)
@tf.function
def test_step(test_images,test_labels):
predictions=sample_CNN(test_images)
loss=loss_fn(test_labels,predictions)
test_loss(loss)
test_accuracy(test_labels,predictions)
EPOCHS=10
for epoch in range(EPOCHS):
for train_images,train_labels in dataset_for_train:
train_step(train_images,train_labels)
for test_images,test_labels in dataset_for_test:
test_step(test_images,test_labels)
template = 'Epoch {},Loss {},Accuracy{}, Test Loss: {},Test Accuracy: {}'
print(template.format(epoch.format(epoch+1,train_loss.result(),
train_accuracy.result()*100,test_loss.result(),test_accuracy.result()*100))