1. 程式人生 > >特徵提取使用已有的卷積基(VGG16)訓練微型模型

特徵提取使用已有的卷積基(VGG16)訓練微型模型

程式碼是《Python深度學習》上的,自己敲一遍看看,我自己的聯想拯救者跑起來都比較慢,GPU will be better!

# -*- coding: utf-8 -*-
"""
Created on Tue Oct 30 22:04:30 2018

@author: Lxiao217
"""
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import VGG16

conv_base = VGG16(weights = 'imagenet',
                  include_top = False,
                  input_shape = (150,150,3))

base_dir = 'F:\\python\\DeepLearning\\cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale = 1./255)
batch_size = 20

def extract_features(directory, sample_count):
    features = np.zeros(shape = (sample_count, 4, 4, 512))
    labels = np.zeros(shape = (sample_count))
    generator = datagen.flow_from_directory(
            directory,
            target_size = (150, 150),
            batch_size = batch_size,
            class_mode = 'binary')
    i = 0
    for input_batch, labels_batch in generator:
        features_batch = conv_base.predict(input_batch)
        features[i * batch_size : (i+1)*batch_size] = features_batch
        labels[i*batch_size : (i+1)*batch_size] = labels_batch
        i += 1
        
        #注意:這些生成器在迴圈中不斷地生成資料,所以你必須在讀取完所有的影象後終止迴圈
        if i * batch_size >= sample_count:
            break
    return features, labels

train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir, 1000)
test_features, test_labels = extract_features(test_dir, 1000)

#將提取的特徵展平
train_features = np.reshape(train_features, (2000, 4*4*512))
validation_features = np.reshape(validation_features, (1000, 4*4*512))
test_features = np.reshape(test_features, (1000, 4*4*512))

from keras import models
from keras import layers
from keras import optimizers


#conv_base.trainable = False   #凍結卷積基,不訓練這個,因為引數太多
model = models.Sequential()
#model.add(conv_base) #特徵提取的第二種方法:擴充套件conv_base 模型,然後在輸入資料上端到端地執行模型
model.add(layers.Dense(256, activation = 'relu', input_dim = 4*4*512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation = 'sigmoid'))

model.compile(optimizer = optimizers.RMSprop(lr = 8e-4),
              loss = 'binary_crossentropy',
              metrics = ['acc'])
history = model.fit(train_features,
                    train_labels,
                    epochs = 30,
                    batch_size = 20,
                    validation_data = (validation_features, validation_labels))

import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'b*', label = 'Train_acc')
plt.plot(epochs, val_acc, 'b', label = 'Validation_acc')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'r*', label = 'Train_loss')
plt.plot(epochs, val_loss, 'r', label = 'Validation_loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.show()

執行的結果如下:

 

驗證精度90%,基本上第四代開始就開始過擬合了,後面使用“微調模型”能達到96%的精度。