DETRAC-Train-Images转换成VOC格式的数据集

网友投稿 715 2022-08-23

DETRAC-Train-Images转换成VOC格式的数据集

DETRAC-Train-Images转换成VOC格式的数据集

在目标检测的实践中,我们经常需要把一种格式的数据转换成另一种格式的数据,这里对​​DETRAC-Train-Images​​进行了解析,数据集的网址为:60 sequences)​​DETRAC-Train-Annotations-XML​​

我的解析代码开源地址为:​​GitHub - w5688414/datasets-preprocessing-for-object-detection: this repository includes some python parser scripts for converting other public datasets into voc data format​​

然后进行解压,我的环境是ubuntu 16.04,python3.5

首先从其提供的xml中,提取每张图片的voc格式的xml,我的文件名为DETRAC_xmlParser.py,代码为:

import xml.etree.ElementTree as ETfrom xml.dom.minidom import Documentimport osimport cv2import timedef ConvertVOCXml(file_path="",file_name=""): tree = ET.parse(file_name) root = tree.getroot() # print(root.tag) num=0 #计数 #读xml操作 frame_lists=[] output_file_name="" for child in root: if(child.tag=="frame"): # 创建dom文档 doc = Document() # 创建根节点 annotation = doc.createElement('annotation') # 根节点插入dom树 doc.appendChild(annotation) #print(child.tag, child.attrib["num"]) pic_id= child.attrib["num"].zfill(5) #print(pic_id) output_file_name=root.attrib["name"]+"__img"+pic_id+".xml" # print(output_file_name) folder = doc.createElement("folder") folder.appendChild(doc.createTextNode("VOC2007")) annotation.appendChild(folder) filename = doc.createElement("filename") pic_name="img"+pic_id+".jpg" filename.appendChild(doc.createTextNode(pic_name)) annotation.appendChild(filename) sizeimage = doc.createElement("size") imagewidth = doc.createElement("width") imageheight = doc.createElement("height") imagedepth = doc.createElement("depth") imagewidth.appendChild(doc.createTextNode("960")) imageheight.appendChild(doc.createTextNode("540")) imagedepth.appendChild(doc.createTextNode("3")) sizeimage.appendChild(imagedepth) sizeimage.appendChild(imagewidth) sizeimage.appendChild(imageheight) annotation.appendChild(sizeimage) target_list=child.getchildren()[0] #获取target_list #print(target_list.tag) object=None for target in target_list: if(target.tag=="target"): #print(target.tag) object = doc.createElement('object') bndbox = doc.createElement("bndbox") for target_child in target: if(target_child.tag=="box"): xmin = doc.createElement("xmin") ymin = doc.createElement("ymin") xmax = doc.createElement("xmax") ymax = doc.createElement("ymax") xmin_value=int(float(target_child.attrib["left"])) ymin_value=int(float(target_child.attrib["top"])) box_width_value=int(float(target_child.attrib["width"])) box_height_value=int(float(target_child.attrib["height"])) xmin.appendChild(doc.createTextNode(str(xmin_value))) ymin.appendChild(doc.createTextNode(str(ymin_value))) if(xmin_value+box_width_value>960): xmax.appendChild(doc.createTextNode(str(960))) else: xmax.appendChild(doc.createTextNode(str(xmin_value+box_width_value))) if(ymin_value+box_height_value>540): ymax.appendChild(doc.createTextNode(str(540))) else: ymax.appendChild(doc.createTextNode(str(ymin_value+box_height_value))) if(target_child.tag=="attribute"): name = doc.createElement('name') pose=doc.createElement('pose') truncated=doc.createElement('truncated') difficult=doc.createElement('difficult') name.appendChild(doc.createTextNode("car")) pose.appendChild(doc.createTextNode("Left")) #随意指定 truncated.appendChild(doc.createTextNode("0")) #随意指定 difficult.appendChild(doc.createTextNode("0")) #随意指定 object.appendChild(name) object.appendChild(pose) object.appendChild(truncated) object.appendChild(difficult) bndbox.appendChild(xmin) bndbox.appendChild(ymin) bndbox.appendChild(xmax) bndbox.appendChild(ymax) object.appendChild(bndbox) annotation.appendChild(object) file_path_out=os.path.join(file_path,output_file_name) f = open(file_path_out, 'w') f.write(doc-rettyxml(indent=' ' * 4)) f.close() num=num+1 return num'''画方框'''def bboxes_draw_on_img(img, bbox, color=[255, 0, 0], thickness=2): # Draw bounding box... print(bbox) p1 = (int(float(bbox["xmin"])), int(float(bbox["ymin"]))) p2 = (int(float(bbox["xmax"])), int(float(bbox["ymax"]))) cv2.rectangle(img, p1, p2, color, thickness)def visualization_image(image_name,xml_file_name): tree = ET.parse(xml_file_name) root = tree.getroot() object_lists=[] for child in root: if(child.tag=="folder"): print(child.tag, child.text) elif (child.tag == "filename"): print(child.tag, child.text) elif (child.tag == "size"): #解析size for size_child in child: if(size_child.tag=="width"): print(size_child.tag,size_child.text) elif (size_child.tag == "height"): print(size_child.tag, size_child.text) elif (size_child.tag == "depth"): print(size_child.tag, size_child.text) elif (child.tag == "object"): #解析object singleObject={} for object_child in child: if (object_child.tag == "name"): # print(object_child.tag,object_child.text) singleObject["name"] = object_child.text elif (object_child.tag == "bndbox"): for bndbox_child in object_child: if (bndbox_child.tag == "xmin"): singleObject["xmin"] = bndbox_child.text # print(bndbox_child.tag, bndbox_child.text) elif (bndbox_child.tag == "ymin"): # print(bndbox_child.tag, bndbox_child.text) singleObject["ymin"] = bndbox_child.text elif (bndbox_child.tag == "xmax"): singleObject["xmax"] = bndbox_child.text elif (bndbox_child.tag == "ymax"): singleObject["ymax"] = bndbox_child.text object_length=len(singleObject) if(object_length>0): object_lists.append(singleObject) img = cv2.imread(image_name) for object_coordinate in object_lists: bboxes_draw_on_img(img,object_coordinate) cv2.imshow("capture", img) cv2.waitKey (0) cv2.destroyAllWindows()if ( __name__ == "__main__"): #print("main") basePath="DETRAC-Train-Annotations-XML" totalxml=os.listdir(basePath) total_num=0 flag=False print("正在转换") saveBasePath="xml_test" if os.path.exists(saveBasePath)==False: #判断文件夹是否存在 os.makedirs(saveBasePath) #ConvertVOCXml(file_path="samplexml",file_name="000009.xml") # Start time start = time.time() log=open("xml_statistical.txt","w") #分析日志,进行排错 for xml in totalxml: file_name=os.path.join(basePath,xml) print(file_name) num=ConvertVOCXml(file_path=saveBasePath,file_name=file_name) print(num) total_num=total_num+num log.write(file_name+" "+str(num)+"\n") # End time end = time.time() seconds=end-start print( "Time taken : {0} seconds".format(seconds)) print(total_num) log.write(str(total_num)+"\n") visualization_image("Insight-MVT_Annotation_Train/MVI_40212/img00396.jpg","xml_test/MVI_40212__img00396.xml")

然后根据生成的vocxml,迁移相应的图片到目标目录中,我的文件名为voc_data_migrate.py,我的代码为:

import osimport randomimport shutil#xml路径的地址XmlPath=r'xml_test'#原图片的地址pictureBasePath=r"Insight-MVT_Annotation_Train"#保存图片的地址saveBasePath=r"picture_test"total_xml = os.listdir(XmlPath)num=len(total_xml)list=range(num)if os.path.exists(saveBasePath)==False: #判断文件夹是否存在 os.makedirs(saveBasePath)for xml in total_xml: xml_temp=xml.split("__") folder=xml_temp[0] filename=xml_temp[1].split(".")[0]+".jpg" # print(folder) # print(filename) temp_pictureBasePath=os.path.join(pictureBasePath,folder) filePath=os.path.join(temp_pictureBasePath,filename) # print(filePath) newfile=xml.split(".")[0]+".jpg" newfile_path=os.path.join(saveBasePath,newfile) print(newfile_path) shutil.copyfile(filePath, newfile_path)print("xml file total number",num)

然后你想要的xml和相应的图片都生成完了,然后放在VOC相应的目录里面,关于VOC格式的目录结构,可以

然后利用下面的代码,代码文件名为ImageSets_Convert.py, 产生trainval.txt,test.txt,train.txt,val.txt文件,这样就可以像VOC2007那样使用这个数据集了。

import osimport randomimport timexmlfilepath=r'./VOC2007/Annotations'saveBasePath=r"./"trainval_percent=0.8train_percent=0.85total_xml = os.listdir(xmlfilepath)num=len(total_xml)list=range(num)tv=int(num*trainval_percent)tr=int(tv*train_percent)trainval= random.sample(list,tv)train=random.sample(trainval,tr)print("train and val size",tv)print("traub suze",tr)ftrainval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/trainval.txt'), 'w')ftest = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/test.txt'), 'w')ftrain = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/train.txt'), 'w')fval = open(os.path.join(saveBasePath,'VOC2007/ImageSets/Main/val.txt'), 'w')# Start timestart = time.time()for i in list: name=total_xml[i][:-4]+'\n' if i in trainval: ftrainval.write(name) if i in train: ftrain.write(name) else: fval.write(name) else: ftest.write(name)# End timeend = time.time()seconds=end-startprint( "Time taken : {0} seconds".format(seconds))ftrainval.close()ftrain.close()fval.close()ftest .close()

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:[leetcode] 144. Binary Tree Preorder Traversal
下一篇:细数10个隐藏在Python中的彩蛋(python彩蛋是什么意思)
相关文章

 发表评论

暂时没有评论,来抢沙发吧~