# To add a new cell, type '# %%'
# To add a new markdown cell, type '# %% [markdown]'
# %%
from IPython import get_ipython
# %%
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sklearn
import sys
import tensorflow as tf
import time
from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
print(module.__name__, module.__version__)
# %%
class_names = [
'airplane',
'automobile',
'bird',
'cat',
'deer',
'dog',
'frog',
'horse',
'ship',
'truck',
]
train_lables_file = './cifar10/trainLabels.csv'
test_csv_file = './cifar10/sampleSubmission.csv'
train_folder = './cifar10/train/'
test_folder = './cifar10/test'
def parse_csv_file(filepath, folder):
"""Parses csv files into (filename(path), label) format"""
results = []
with open(filepath, 'r') as f:
lines = f.readlines()[1:]
for line in lines:
image_id, label_str = line.strip('\n').split(',')
image_full_path = os.path.join(folder, image_id + '.png')
results.append((image_full_path, label_str))
return results
train_labels_info = parse_csv_file(train_lables_file, train_folder)
test_csv_info = parse_csv_file(test_csv_file, test_folder)
import pprint
pprint.pprint(train_labels_info[0:5])
pprint.pprint(test_csv_info[0:5])
print(len(train_labels_info), len(test_csv_info))
# %%
# train_df = pd.DataFrame(train_labels_info)
train_df = pd.DataFrame(train_labels_info[0:45000])
valid_df = pd.DataFrame(train_labels_info[45000:])
test_df = pd.DataFrame(test_csv_info)
train_df.columns = ['filepath', 'class']
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']
print(train_df.head())
print(valid_df.head())
print(test_df.head())
# %%
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale = 1./255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
fill_mode = 'nearest',
)
train_generator = train_datagen.flow_from_dataframe(
train_df,
directory = './',
x_col = 'filepath',
y_col = 'class',
classes = class_names,
target_size = (height, width),
batch_size = batch_size,
seed = 7,
shuffle = True,
class_mode = 'sparse',
)
valid_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale = 1./255)
valid_generator = valid_datagen.flow_from_dataframe(
valid_df,
directory = './',
x_col = 'filepath',
y_col = 'class',
classes = class_names,
target_size = (height, width),
batch_size = batch_size,
seed = 7,
shuffle = False,
class_mode = "sparse")
train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num, valid_num)
# %%
for i in range(2):
x, y = train_generator.next()
print(x.shape, y.shape)
print(y)
# %%
model = keras.models.Sequential([
keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',
activation='relu',
input_shape=[width, height, channels]),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=128, kernel_size=3, padding='same',
activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',
activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',
activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',
activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.Conv2D(filters=512, kernel_size=3, padding='same',
activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.MaxPool2D(pool_size=2),
keras.layers.Flatten(),
keras.layers.Dense(512, activation='relu'),
keras.layers.Dense(num_classes, activation='softmax'),
])
model.compile(loss="sparse_categorical_crossentropy",
optimizer="adam", metrics=['accuracy'])
model.summary()
# %%
epochs = 20
history = model.fit_generator(train_generator,
steps_per_epoch = train_num // batch_size,
epochs = epochs,
validation_data = valid_generator,
validation_steps = valid_num // batch_size)
# %%
def plot_learning_curves(history, label, epcohs, min_value, max_value):
data = {}
data[label] = history.history[label]
data['val_'+label] = history.history['val_'+label]
pd.DataFrame(data).plot(figsize=(8, 5))
plt.grid(True)
plt.axis([0, epochs, min_value, max_value])
plt.show()
plot_learning_curves(history, 'acc', epochs, 0, 1)
plot_learning_curves(history, 'loss', epochs, 0, 2)
# %%
test_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale = 1./255)
test_generator = valid_datagen.flow_from_dataframe(
test_df,
directory = './',
x_col = 'filepath',
y_col = 'class',
classes = class_names,
target_size = (height, width),
batch_size = batch_size,
seed = 7,
shuffle = False,
class_mode = "sparse")
test_num = test_generator.samples
print(test_num)
# %%
test_predict = model.predict_generator(test_generator,
workers = 10,
use_multiprocessing = True)
# %%
print(test_predict.shape)
# %%
print(test_predict[0:5])
# %%
test_predict_class_indices = np.argmax(test_predict, axis = 1)
# %%
print(test_predict_class_indices[0:5])
# %%
test_predict_class = [class_names[index]
for index in test_predict_class_indices]
# %%
print(test_predict_class[0:5])
# %%
def generate_submissions(filename, predict_class):
with open(filename, 'w') as f:
f.write('id,label\n')
for i in range(len(predict_class)):
f.write('%d,%s\n' % (i+1, predict_class[i]))
output_file = "./cifar10/submission.csv"
generate_submissions(output_file, test_predict_class)
# %%
[Tensorflow][转载]cifar10数据集模型搭建与训练
转载futureflsl 博主文章分类:tensorflow
文章标签 ci tensorflow 文章分类 Python 后端开发
-
4个大语言模型训练中的典型开源数据集
本篇文章将介绍大语言模型应用中的典型开源数据集集合。
数据集 数据 语言模型 Pile ROOTS -
搭建Vgg16训练CIFAR10数据集
今天我们来学习下经典网络VGG,并且模拟实现VGG16,且用来训练cifar10数据集
VGG 深度学习 pytorch 2d ide -
AlexNet训练CIFAR10数据集——pytorch实现
代码在kaggle上跑,精度最终为90%输入一张3*227*227的图片,每一层的输出:Co
python alexnet 2d ide 卷积 -
DenseNet训练CIFAR10数据集的pytorch实现
代码在kaggle上训练30分钟,精度80%Sequential output shape: torch.Size([1, 64, 24
densenet pytorch 2d 卷积 sed -
ResNet训练CIFAR10数据集的pytorch实现
代码在kaggle上跑了1个小时,精度最终达到90%Sequential output
resnet 2d ide sed -
数据集VOC/CIFAR10/CATDOG
目标检测
python pytorch 目标检测 数据集 -
python CIFAR10 Python cifar10数据集下载
一.百度网盘Cifar获取地址: 链接:https://pan.baidu.com/s/132yQGedau02Bw47fz75bYQ 提取码:bnvd 二.Tensorflow代码实现: 该程序部分分为两个文件,分别是:Ci
python CIFAR10 Cifar-10 人工智能 Tensorflow:卷积网络 数据