wider face資料集轉化為VOC資料集(matlab實現)
阿新 • • 發佈:2019-01-25
head.xml
object.xml<annotation> <folder>widerface</folder> <filename>%06d.jpg</filename> <source> <database>My Database</database> <annotation>VOC2007</annotation> <image>flickr</image> <flickrid>NULL</flickrid> </source> <owner> <flickrid>NULL</flickrid> <name>facevise</name> </owner> <size> <width>%d</width> <height>%d</height> <depth>%d</depth> </size> <segmented>0</segmented>
tail.xml<object> <name>%s</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>%d</xmin> <ymin>%d</ymin> <xmax>%d</xmax> <ymax>%d</ymax> </bndbox> </object>
</annotation>
function WiderFace2VOC() %% wider face % The corresponding annotations are in the following format: % Here, each face bounding boxe is denoted by: % <x_left y_top width height>. %% voc % 000001.jpg car 44 28 132 121 %前面是圖片名,中間是目標類別,最後是目標的包圍框座標(左上角和右下角座標)。 %% clc; clear; fclose all; [~, ~, ~] = rmdir('Annotations', 's'); [~, ~, ~] = rmdir('ImageSets', 's'); [~, ~, ~] = rmdir('JPEGImages', 's'); [~, ~, ~] = mkdir('Annotations'); [~, ~, ~] = mkdir('ImageSets/Main'); [~, ~, ~] = mkdir('JPEGImages'); train_root = 'WIDER_train/images'; split_file = 'wider_face_split/wider_face_train'; data = load(split_file); headXml = fopen('head.xml', 'r'); headXmlFormat = fread(headXml, Inf, '*char'); fclose(headXml); objectXml = fopen('object.xml', 'r'); objectXmlFormat = fread(objectXml, Inf, '*char'); fclose(objectXml); tailXml = fopen('tail.xml', 'r'); tailXmlFormat = fread(tailXml, Inf, '*char'); fclose(tailXml); trainID = fopen('ImageSets/Main/train.txt', 'w'); trainvalID = fopen('ImageSets/Main/trainval.txt', 'w'); valID = fopen('ImageSets/Main/val.txt', 'w'); testID = fopen('ImageSets/Main/test.txt', 'w'); idx = 1; for i=1:numel(data.event_list) for j=1:numel(data.file_list{i}) imagename = fullfile(train_root, data.event_list{i}, strcat(data.file_list{i}{j}, '.jpg')); sz = size(imread(imagename)); AnnotationsXml = fopen(sprintf('Annotations/%06d.xml', idx), 'w'); fprintf(AnnotationsXml, headXmlFormat, idx, sz(2), sz(1),sz(3)); for k = 1:size(data.face_bbx_list{i}{j}, 1) rc = data.face_bbx_list{i}{j}(k, :); if (rc(3)<=0||rc(4)<=0) disp('debug1') continue; end rc = round([rc(1), rc(2), rc(1)+rc(3)-1, rc(2)+rc(4)-1]); if (rc(3)>=sz(2)||rc(4)>=sz(1)) disp('debug2') continue; end fprintf(AnnotationsXml, objectXmlFormat, 'face', rc(1), rc(2), rc(3), rc(4)); end fprintf(AnnotationsXml, tailXmlFormat); fprintf(trainID, '%06d\n', idx); fprintf(trainvalID, '%06d\n', idx); fclose(AnnotationsXml); copyfile(imagename, sprintf('JPEGImages/%06d.jpg', idx)); idx = idx + 1; end disp(i); end train_root = 'WIDER_val/images'; split_file = 'wider_face_split/wider_face_val'; data = load(split_file); for i=1:numel(data.event_list) for j=1:numel(data.file_list{i}) imagename = fullfile(train_root, data.event_list{i}, strcat(data.file_list{i}{j}, '.jpg')); sz = size(imread(imagename)); AnnotationsXml = fopen(sprintf('Annotations/%06d.xml', idx), 'w'); fprintf(AnnotationsXml, headXmlFormat, idx, sz(2), sz(1),sz(3)); for k = 1:size(data.face_bbx_list{i}{j}, 1) rc = data.face_bbx_list{i}{j}(k, :); if (rc(3)<=0||rc(4)<=0) disp('debug1') continue; end rc = round([rc(1), rc(2), rc(1)+rc(3)-1, rc(2)+rc(4)-1]); if (rc(3)>=sz(2)||rc(4)>=sz(1)) disp('debug2') continue; end fprintf(AnnotationsXml, objectXmlFormat, 'face', rc(1), rc(2), rc(3), rc(4)); end fprintf(AnnotationsXml, tailXmlFormat); if mod(idx, 2) fprintf(valID, '%06d\n', idx); fprintf(trainvalID, '%06d\n', idx); else fprintf(testID, '%06d\n', idx); end fclose(AnnotationsXml); copyfile(imagename, sprintf('JPEGImages/%06d.jpg', idx)); idx = idx+1; end disp(i); end fclose(trainID); fclose(trainvalID); fclose(valID); fclose(testID); fclose all;
因為我用的mxnet中faster rcnn的需要,我過濾掉了w,h小於0的框,也就是得出來x2<x1,y2<y1的框。
其中trainID為全部的訓練集,trainvalID為全部訓練集加一半驗證集,valID為一半驗證集,testID為另一半驗證集
如果要得到全部的驗證集在valID和trainvalID中,程式碼如下:
function WiderFace2VOC()
%% wider face
% The corresponding annotations are in the following format:
% Here, each face bounding boxe is denoted by:
% <x_left y_top width height>.
%% voc
% 000001.jpg car 44 28 132 121
%前面是圖片名,中間是目標類別,最後是目標的包圍框座標(左上角和右下角座標)。
%%
clc;
clear;
fclose all;
[~, ~, ~] = rmdir('Annotations', 's');
[~, ~, ~] = rmdir('ImageSets', 's');
[~, ~, ~] = rmdir('JPEGImages', 's');
[~, ~, ~] = mkdir('Annotations');
[~, ~, ~] = mkdir('ImageSets/Main');
[~, ~, ~] = mkdir('JPEGImages');
train_root = 'WIDER_train/images';
split_file = 'wider_face_split/wider_face_train';
data = load(split_file);
headXml = fopen('head.xml', 'r');
headXmlFormat = fread(headXml, Inf, '*char');
fclose(headXml);
objectXml = fopen('object.xml', 'r');
objectXmlFormat = fread(objectXml, Inf, '*char');
fclose(objectXml);
tailXml = fopen('tail.xml', 'r');
tailXmlFormat = fread(tailXml, Inf, '*char');
fclose(tailXml);
trainID = fopen('ImageSets/Main/train.txt', 'w');
trainvalID = fopen('ImageSets/Main/trainval.txt', 'w');
valID = fopen('ImageSets/Main/val.txt', 'w');
%testID = fopen('ImageSets/Main/test.txt', 'w');
idx = 1;
for i=1:numel(data.event_list)
for j=1:numel(data.file_list{i})
imagename = fullfile(train_root, data.event_list{i}, strcat(data.file_list{i}{j}, '.jpg'));
sz = size(imread(imagename));
AnnotationsXml = fopen(sprintf('Annotations/%06d.xml', idx), 'w');
fprintf(AnnotationsXml, headXmlFormat, idx, sz(2), sz(1),sz(3));
for k = 1:size(data.face_bbx_list{i}{j}, 1)
rc = data.face_bbx_list{i}{j}(k, :);
if (rc(3)<=0||rc(4)<=0)
disp('debug1')
continue;
end
rc = round([rc(1), rc(2), rc(1)+rc(3)-1, rc(2)+rc(4)-1]);
if (rc(3)>=sz(2)||rc(4)>=sz(1))
disp('debug2')
continue;
end
fprintf(AnnotationsXml, objectXmlFormat, 'face', rc(1), rc(2), rc(3), rc(4));
end
fprintf(AnnotationsXml, tailXmlFormat);
fprintf(trainID, '%06d\n', idx);
fprintf(trainvalID, '%06d\n', idx);
fclose(AnnotationsXml);
copyfile(imagename, sprintf('JPEGImages/%06d.jpg', idx));
idx = idx + 1;
end
disp(i);
end
train_root = 'WIDER_val/images';
split_file = 'wider_face_split/wider_face_val';
data = load(split_file);
for i=1:numel(data.event_list)
for j=1:numel(data.file_list{i})
imagename = fullfile(train_root, data.event_list{i}, strcat(data.file_list{i}{j}, '.jpg'));
sz = size(imread(imagename));
AnnotationsXml = fopen(sprintf('Annotations/%06d.xml', idx), 'w');
fprintf(AnnotationsXml, headXmlFormat, idx, sz(2), sz(1),sz(3));
for k = 1:size(data.face_bbx_list{i}{j}, 1)
rc = data.face_bbx_list{i}{j}(k, :);
if (rc(3)<=0||rc(4)<=0)
disp('debug1')
continue;
end
rc = round([rc(1), rc(2), rc(1)+rc(3)-1, rc(2)+rc(4)-1]);
if (rc(3)>=sz(2)||rc(4)>=sz(1))
disp('debug2')
continue;
end
fprintf(AnnotationsXml, objectXmlFormat, 'face', rc(1), rc(2), rc(3), rc(4));
end
fprintf(AnnotationsXml, tailXmlFormat);
%if mod(idx, 2)
fprintf(valID, '%06d\n', idx);
fprintf(trainvalID, '%06d\n', idx);
%else
% fprintf(testID, '%06d\n', idx);
%end
fclose(AnnotationsXml);
copyfile(imagename, sprintf('JPEGImages/%06d.jpg', idx));
idx = idx+1;
end
disp(i);
end
fclose(trainID);
fclose(trainvalID);
fclose(valID);
%fclose(testID);
fclose all;