1. 程式人生 > >Windows Caffe中MNIST資料格式轉換實現

Windows Caffe中MNIST資料格式轉換實現

               

Caffe原始碼中src/caffe/caffe/examples/mnist/convert_mnist_data.cpp提供的實現程式碼並不能直接在Windows下執行,這裡在原始碼的基礎上進行了改寫,使其可以直接在Windows 64位上直接執行,改寫程式碼如下:

#include "stdafx.h"#include <gflags/gflags.h>#include <glog/logging.h>#include <google/protobuf/text_format.h>#include <leveldb/db.h>#include <leveldb/write_batch.h>
#include <lmdb.h>#include <stdint.h>#include <sys/stat.h>#include <fstream>  // NOLINT(readability/streams)#include <string>#include <iostream>#include "caffe/proto/caffe.pb.h"using namespace caffe;  // NOLINT(build/namespaces)using std::string;// gflags中的資料型別,c++ string,在這裡指定轉換到lmdb還是leveldb
DEFINE_string(backend, "lmdb", "The backend for storing the result");uint32_t swap_endian(uint32_t val) { val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); return (val << 16) | (val >> 16);}void convert_dataset(const char* image_filename, const char* label_filename, const
char* db_path, const string& db_backend)
// Open files std::ifstream image_file(image_filename, std::ios::in | std::ios::binary)std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); CHECK(image_file) << "Unable to open file " << image_filename; CHECK(label_file) << "Unable to open file " << label_filename; // Read the magic and the meta data uint32_t magic; uint32_t num_items; uint32_t num_labels; uint32_t rows; uint32_t cols; // 讀取檔案前n個位元組,獲取影象數量、影象寬、影象高 image_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2051) << "Incorrect image file magic."; label_file.read(reinterpret_cast<char*>(&magic), 4); magic = swap_endian(magic); CHECK_EQ(magic, 2049) << "Incorrect label file magic."; image_file.read(reinterpret_cast<char*>(&num_items), 4); num_items = swap_endian(num_items); label_file.read(reinterpret_cast<char*>(&num_labels), 4); num_labels = swap_endian(num_labels); CHECK_EQ(num_items, num_labels); image_file.read(reinterpret_cast<char*>(&rows), 4); rows = swap_endian(rows); image_file.read(reinterpret_cast<char*>(&cols), 4); cols = swap_endian(cols); // lmdb MDB_env *mdb_env; MDB_dbi mdb_dbi; MDB_val mdb_key, mdb_data; MDB_txn *mdb_txn; // leveldb leveldb::DB* db = NULL; leveldb::Options options; options.error_if_exists = true; options.create_if_missing = true; options.write_buffer_size = 268435456; leveldb::WriteBatch* batch = NULL// Open db if (db_backend == "leveldb") {  // leveldb  LOG(INFO) << "Opening leveldb " << db_path;  leveldb::Status status = leveldb::DB::Open(   options, db_path, &db);  CHECK(status.ok()) << "Failed to open leveldb " << db_path   << ". Is it already existing?";  batch = new leveldb::WriteBatch(); } else if (db_backend == "lmdb") {  // lmdb  int rc;  LOG(INFO) << "Opening lmdb " << db_path;  // 建立指定的存放目錄  //CHECK_EQ(mkdir(db_path, 0744), 0)  std::string strPath = std::string(db_path);  std::string delPath = "rmdir /s/q " + strPath;  system(delPath.c_str());  strPath = "mkdir " + strPath;  system(strPath.c_str());  //CHECK_EQ(system(strPath.c_str()), 0) << "mkdir " << db_path << "failed";  // 建立lmdb資料庫  CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";  //CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS) << "mdb_env_set_mapsize failed";//1TB  CHECK_EQ(mdb_env_set_mapsize(mdb_env, 107374182), MDB_SUCCESS) << "mdb_env_set_mapsize failed";//100MB  CHECK_EQ(mdb_env_open(mdb_env, db_path, 0, 0664), MDB_SUCCESS) << "mdb_env_open failed";  CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS) << "mdb_txn_begin failed";  CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS) << "mdb_open failed. Does the lmdb already exist? "; } else {  LOG(FATAL) << "Unknown db backend " << db_backend; } // Storing to db char label; char* pixels = new char[rows * cols]; int count = 0const int kMaxKeyLength = 10char key_cstr[kMaxKeyLength]; string value;  Datum datum; // Caffe資料類 datum.set_channels(1); datum.set_height(rows); datum.set_width(cols); LOG(INFO) << "A total of " << num_items << " items."; LOG(INFO) << "Rows: " << rows << " Cols: " << cols; // 將資料寫入lmdb或leveldb資料庫 for (int item_id = 0; item_id < num_items; ++item_id) {  image_file.read(pixels, rows * cols);  label_file.read(&label, 1);  datum.set_data(pixels, rows*cols);  datum.set_label(label);  //snprintf(key_cstr, kMaxKeyLength, "%08d", item_id);  int ret = _snprintf(key_cstr, kMaxKeyLength, "%08d", item_id);  if (ret == kMaxKeyLength || ret < 0) {   printf("warning ");   key_cstr[kMaxKeyLength - 1] = 0;  }  datum.SerializeToString(&value);  string keystr(key_cstr);  // Put in db  if (db_backend == "leveldb") {  // leveldb   batch->Put(keystr, value);  }  else if (db_backend == "lmdb") {  // lmdb   mdb_data.mv_size = value.size();   mdb_data.mv_data = reinterpret_cast<void*>(&value[0]);   mdb_key.mv_size = keystr.size();   mdb_key.mv_data = reinterpret_cast<void*>(&keystr[0]);   CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS)    << "mdb_put failed";  }  else {   LOG(FATAL) << "Unknown db backend " << db_backend;  }  if (++count % 1000 == 0) {   // Commit txn   if (db_backend == "leveldb") {  // leveldb    db->Write(leveldb::WriteOptions(), batch);    delete batch;    batch = new leveldb::WriteBatch();   }   else if (db_backend == "lmdb") {  // lmdb    CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS)     << "mdb_txn_commit failed";    CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)     << "mdb_txn_begin failed";   }   else {    LOG(FATAL) << "Unknown db backend " << db_backend;   }  } } // write the last batch if (count % 1000 != 0) {  if (db_backend == "leveldb") {  // leveldb   db->Write(leveldb::WriteOptions(), batch);   delete batch;   delete db;  }  else if (db_backend == "lmdb") {  // lmdb   CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) << "mdb_txn_commit failed";   mdb_close(mdb_env, mdb_dbi);   mdb_env_close(mdb_env);  }  else {   LOG(FATAL) << "Unknown db backend " << db_backend;  }  LOG(ERROR) << "Processed " << count << " files."; } delete[] pixels;}int main(int argc, char* argv[]){#ifndef GFLAGS_GFLAGS_H_ namespace gflags = google;#endif argc = 4;#ifdef _DEBUG argv[0] = "E:/GitCode/Caffe/lib/dbg/x86_vc12/tools_convert_mnist_data[dbg_x86_vc12].exe";#else argv[0] = "E:/GitCode/Caffe/lib/rel/x86_vc12/tools_convert_mnist_data[rel_x86_vc12].exe";#endif //argv[1] = "E:/GitCode/Caffe/src/caffe/caffe/data/mnist/t10k-images.idx3-ubyte"; //argv[2] = "E:/GitCode/Caffe/src/caffe/caffe/data/mnist/t10k-labels.idx1-ubyte"; //argv[3] = "E:\\GitCode\\Caffe\\src\\caffe\\caffe\\data\\mnist\\lmdb\\test"; argv[1] = "E:/GitCode/Caffe/src/caffe/caffe/data/mnist/train-images.idx3-ubyte"; argv[2] = "E:/GitCode/Caffe/src/caffe/caffe/data/mnist/train-labels.idx1-ubyte"; argv[3] = "E:\\GitCode\\Caffe\\src\\caffe\\caffe\\data\\mnist\\lmdb\\train"// 用來設定usage說明 gflags::SetUsageMessage("This script converts the MNIST dataset to\n"  "the lmdb/leveldb format used by Caffe to load data.\n"  "Usage:\n"  "    convert_mnist_data [FLAGS] input_image_file input_label_file "  "output_db_file\n"  "The MNIST dataset could be downloaded at\n"  "    http://yann.lecun.com/exdb/mnist/\n"  "You should gunzip them after downloading,"  "or directly use data/mnist/get_mnist.sh\n"); // 解析命令列引數 gflags::ParseCommandLineFlags(&argc, &argv, true); // 獲取標誌引數backend的值 const string& db_backend = FLAGS_backend; if (argc != 4) {  // 輸出usage說明  gflags::ShowUsageWithFlagsRestrict(argv[0],   "examples/mnist/convert_mnist_data"); } else {  // 設定日誌檔名中"檔名"欄位  // 每個程序中至少要執行一次InitGoogleLogging,否則不產生日誌檔案  google::InitGoogleLogging(argv[0]);  convert_dataset(argv[1], argv[2], argv[3], db_backend); } std::cout << "ok!" << std::endlreturn 0;}
GitHubhttps://github.com/fengbingchun/Caffe_Test