1. 程式人生 > >caffe多標籤輸入和多工學習

caffe多標籤輸入和多工學習

拷貝convert_imageset,生成新工程convert_imageset_multi_label

image

修改原始碼

std::ifstream infile(argv[2]);
    std::vector<std::pair<std::string, std::vector<float>> > lines;
    std::string filename;
    std::string label_count_string = argv[5];
    int label_count = std::atoi(label_count_string.c_str());
    std::vector
<float> label(label_count); while (infile >> filename) { for (int i = 0; i < label_count; i++) infile >> label[i]; lines.push_back(std::make_pair(filename, label)); } if (FLAGS_shuffle) { // randomly shuffle data LOG(INFO) << "
Shuffling data"; shuffle(lines.begin(), lines.end()); } LOG(INFO) << "A total of " << lines.size() << " images."; if (encode_type.size() && !encoded) LOG(INFO) << "encode_type specified, assuming encoded=true."; int resize_height = std::max<int
>(0, FLAGS_resize_height); int resize_width = std::max<int>(0, FLAGS_resize_width); // Create new DB scoped_ptr<db::DB> db_image(db::GetDB(FLAGS_backend)); scoped_ptr<db::DB> db_label(db::GetDB(FLAGS_backend)); db_image->Open(argv[3], db::NEW); db_label->Open(argv[4], db::NEW); scoped_ptr<db::Transaction> txn_image(db_image->NewTransaction()); scoped_ptr<db::Transaction> txn_label(db_label->NewTransaction()); // // Create new DB // scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); // db->Open(argv[3], db::NEW); // scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db std::string root_folder(argv[1]); Datum datum_image; Datum datum_label; int count = 0; int data_size = 0; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { bool status; std::string enc = encode_type; if (encoded && !enc.size()) { // Guess the encoding type from the file name string fn = lines[line_id].first; size_t p = fn.rfind('.'); if ( p == fn.npos ) LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; enc = fn.substr(p); std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); } status = ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second[0], resize_height, resize_width, is_color, enc, &datum_image); if (status == false) continue; if (check_size) { if (!data_size_initialized) { data_size = datum_image.channels() * datum_image.height() * datum_image.width(); data_size_initialized = true; } else { const std::string& data = datum_image.data(); CHECK_EQ(data.size(), data_size) << "Incorrect data field size " << data.size(); } } // sequential string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first; // Put in db string out; CHECK(datum_image.SerializeToString(&out)); txn_image->Put(key_str, out); ////////////////////////////////////////////////////////////////////////// datum_label.set_channels(label_count); datum_label.set_height(1); datum_label.set_width(1); datum_label.clear_data(); datum_label.clear_float_data(); datum_label.set_encoded(false); std::vector<float> label_vec = lines[line_id].second; for (int i = 0; i < label_vec.size();i++) { datum_label.add_float_data(label_vec[i]); } string out_label; CHECK(datum_label.SerializeToString(&out_label)); txn_label->Put(key_str, out_label); ////////////////////////////////////////////////////////////////////////// if (++count % 1000 == 0) { // Commit db txn_image->Commit(); txn_image.reset(db_image->NewTransaction()); txn_label->Commit(); txn_label.reset(db_label->NewTransaction()); LOG(INFO) << "Processed " << count << " files."; } } // write the last batch if (count % 1000 != 0) { txn_image->Commit(); txn_label->Commit(); LOG(INFO) << "Processed " << count << " files."; }

上述方式使用了二個data層,編譯之後,使用如下方式生成:

Build\x64\Release>convert_imageset_multi_label.exe ./ train.txt data/train_image_lmdb data/train_label_lmdb 4

train.txt檔案格式如下:

data/00A03AF5-41C7-4966-8EF3-8B2C90DCF75C_cgfn.jpg 1 2 3 6
data/00A15FBD-9637-44C5-B2E7-81611263C88C_tmph.jpg 2 5 6 4

網路配置檔案需要加入slice層將標籤分割開來

layer {
  name: "slice"
  type: "Slice"
  bottom: "label"
  top: "label_1"
  top: "label_2"
  top: "label_3"
  top: "label_4"
  slice_param {
    axis: 1
    slice_point: 1
    slice_point: 2
    slice_point: 3
  }
}

image

也可以通過python直接生成lmdb格式,其方式如下:

# -*- coding: utf-8 -*-
"""
Created on Sat Dec 24 20:57:28 2016

@author: zhouly
"""

import lmdb
from skimage import io
import numpy as np 
import sys
caffe_root = '../../'
sys.path.insert(0, caffe_root + '/python')
import caffe
import cv2
root = '../../'
file_input=open(root+'data/train.txt','r')
in_image_db=lmdb.open(root+'examples/99/train_image_lmdb', map_size=int(1e12))
in_label_db=lmdb.open(root+'examples/99/train_label_lmdb', map_size=int(1e12))
in_image_txn = in_image_db.begin(write=True)
in_label_txn = in_label_db.begin(write=True)
for in_idx, in_ in enumerate(file_input):
    content = in_.strip()
    content = content.split(' ')
    im_file = root + 'data/verification/' + content[0]
    try:
        im = io.imread(im_file)
    except:
        print '-------------------------', im_file
        continue
    im = im[:,:, 3]
    im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_LINEAR)
    data = np.zeros((3, 224, 224), np.uint8)
    data[0, :, :] = im[:, :]
    data[1, :, :] = im[:, :]
    data[2, :, :] = im[:, :]
    im_dat = caffe.io.array_to_datum(data)
    in_image_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
    print 'data train: {} [{}]'.format(content[0], in_idx + 1)
    del im_file, im, im_dat, data

    target_label = np.zeros((4, 1, 1))
    target_label[0, 0, 0] = int(content[1])
    target_label[1, 0, 0] = int(content[2])
    target_label[2, 0, 0] = int(content[3])
    target_label[3, 0, 0] = int(content[4])
    label_data = caffe.io.array_to_datum(target_label)
    in_label_txn.put('{:0>10d}'.format(in_idx), label_data.SerializeToString())
    del target_label, label_data
in_image_txn.commit()
in_label_txn.commit()
in_image_db.close()
in_label_db.close()
file_input.close()