Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 65 additions & 54 deletions download.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import pandas as pd
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

try:
from urllib.request import urlretrieve # Python 3
except ImportError:
from urllib import urlretrieve # Python 2
classes = pd.read_csv('./classes.csv')
labelnames = classes['LabelName'].tolist()
classnames = classes['DisplayName'].tolist()

classes = pd.read_csv("./classes.csv")
labelnames = classes["LabelName"].tolist()
classnames = classes["DisplayName"].tolist()
__imageids = []
__imageids_and_bbox = {}
imageids = []
Expand All @@ -24,74 +27,82 @@
image_id = 0

imageid_and_labelname = pd.read_csv(
'./open_images_data/oidv6-train-annotations-human-imagelabels.csv')
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/test-annotations-human-imagelabels-boxable.csv'))
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/test-annotations-machine-imagelabels.csv'))
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/train-annotations-human-imagelabels-boxable.csv'))
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/train-annotations-machine-imagelabels.csv'))
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/validation-annotations-human-imagelabels-boxable.csv'))
imageid_and_labelname.append(pd.read_csv(
'./open_images_data/validation-annotations-machine-imagelabels.csv'))
tqdm_iter = tqdm(imageid_and_labelname['ImageID'])
for imageid, labelname in zip(tqdm_iter, imageid_and_labelname['LabelName']):
"./open_images_data/oidv6-train-annotations-human-imagelabels.csv")
imageid_and_labelname.append(
pd.read_csv(
"./open_images_data/test-annotations-human-imagelabels-boxable.csv"))
imageid_and_labelname.append(
pd.read_csv("./open_images_data/test-annotations-machine-imagelabels.csv"))
imageid_and_labelname.append(
pd.read_csv(
"./open_images_data/train-annotations-human-imagelabels-boxable.csv"))
imageid_and_labelname.append(
pd.read_csv(
"./open_images_data/train-annotations-machine-imagelabels.csv"))
imageid_and_labelname.append(
pd.read_csv(
"./open_images_data/validation-annotations-human-imagelabels-boxable.csv"
))
imageid_and_labelname.append(
pd.read_csv(
"./open_images_data/validation-annotations-machine-imagelabels.csv"))
tqdm_iter = tqdm(imageid_and_labelname["ImageID"])
for imageid, labelname in zip(tqdm_iter, imageid_and_labelname["LabelName"]):
if labelname in labelnames:
tqdm_iter.set_description(f'{imageid}-{labelname}')
tqdm_iter.set_description(f"{imageid}-{labelname}")
__imageids.append(imageid)

del imageid_and_labelname


xmin_ymin_xmax_ymax = pd.read_csv(
'./open_images_data/oidv6-train-annotations-bbox.csv')
xmin_ymin_xmax_ymax.append(pd.read_csv(
'./open_images_data/test-annotations-bbox.csv'))
xmin_ymin_xmax_ymax.append(pd.read_csv(
'./open_images_data/validation-annotations-bbox.csv'))
"./open_images_data/oidv6-train-annotations-bbox.csv")
xmin_ymin_xmax_ymax.append(
pd.read_csv("./open_images_data/test-annotations-bbox.csv"))
xmin_ymin_xmax_ymax.append(
pd.read_csv("./open_images_data/validation-annotations-bbox.csv"))
for i in tqdm(range(len(xmin_ymin_xmax_ymax))):
info = xmin_ymin_xmax_ymax.iloc[i]
if info['ImageID'] in __imageids:
__imageids_and_bbox[info['ImageID']] = [
info['XMin'], info['YMin'], info['XMax'], info['YMax']]
if info["ImageID"] in __imageids:
__imageids_and_bbox[info["ImageID"]] = [
info["XMin"],
info["YMin"],
info["XMax"],
info["YMax"],
]
del xmin_ymin_xmax_ymax

urls = pd.read_csv(
'./open_images_data/oidv6-train-images-with-labels-with-rotation.csv')
urls.append(pd.read_csv(
'./open_images_data/test-images-with-rotation.csv'))
urls.append(pd.read_csv(
'./open_images_data/train-images-boxable-with-rotation.csv'))
urls.append(pd.read_csv(
'./open_images_data/validation-images-with-rotation.csv'))
"./open_images_data/oidv6-train-images-with-labels-with-rotation.csv")
urls.append(pd.read_csv("./open_images_data/test-images-with-rotation.csv"))
urls.append(
pd.read_csv("./open_images_data/train-images-boxable-with-rotation.csv"))
urls.append(
pd.read_csv("./open_images_data/validation-images-with-rotation.csv"))
for i in tqdm(range(len(urls))):
url = urls.iloc[i]
if url['ImageID'] in __imageids:
urlretrieve(url['OriginalURL'], f"./data/{image_id}.png")
xmin, ymin, xmax, ymax = __imageids_and_bbox[url['ImageID']]
if url["ImageID"] in __imageids:
urlretrieve(url["OriginalURL"], f"./data/{image_id}.png")
xmin, ymin, xmax, ymax = __imageids_and_bbox[url["ImageID"]]
file_names.append(f"./data/{image_id}.png")
type_of_data.append(url['Subset'])
imageurls.append(url['OriginalURL'])
imageurls_original.append(url['OriginalLandingURL'])
imageids.append(url['ImageID'])
type_of_data.append(url["Subset"])
imageurls.append(url["OriginalURL"])
imageurls_original.append(url["OriginalLandingURL"])
imageids.append(url["ImageID"])
xmins.append(xmin)
ymins.append(ymin)
xmaxs.append(xmax)
ymaxs.append(ymax)
image_id += 1

data = pd.DataFrame({
'ImageIds': imageids,
'XMin': xmins,
'YMin': ymins,
'XMax': xmaxs,
'YMax': ymaxs,
'File Name': file_names,
'ImageUrls': imageurls,
'Og_ImageUrls': imageurls_original,
'Type of Data': type_of_data,
"ImageIds": imageids,
"XMin": xmins,
"YMin": ymins,
"XMax": xmaxs,
"YMax": ymaxs,
"File Name": file_names,
"ImageUrls": imageurls,
"Og_ImageUrls": imageurls_original,
"Type of Data": type_of_data,
})
data.to_csv('./data.csv')
data.to_csv("./data.csv")