-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstruct_dataset.py
More file actions
75 lines (54 loc) · 3.41 KB
/
construct_dataset.py
File metadata and controls
75 lines (54 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import os
import pandas as pd
import numpy as np
from utils.synthetic_generator import generator_dhtlp
from PIL import Image
import json
def create_set(imageDir, outDir, n=1):
images = []
for root, _, files in os.walk(imageDir):
for name in files:
if name.endswith('.fit'):
images.append(os.path.join(root,name))
if not os.path.exists(os.path.join(outDir,'images')):
os.makedirs(os.path.join(outDir,'images'), exist_ok=True)
train = []
test = []
df = pd.read_csv(os.path.join(imageDir, 'labels.csv'), header=None, sep=',')
df.columns = ['type', 'x', 'y', 'filename', 'width', 'height']
for counter, image in enumerate(images):
split = np.random.randint(0,4) #Do the split here to have all images from the same original image in the same set
for i in range(n): #use n times the same image to create n different synthetic streaks
file = os.path.split(image)[1]
img_name = os.path.splitext(file)[0] + '_{}'.format(i) + '.png'
img_name_annot = os.path.splitext(file)[0] + '.png'
crop, y_true, coord2 = generator_dhtlp(image, n_streaks=1) #load original image and add a synthetic streak
coord1 = df[df.filename == img_name_annot][['x', 'y']].values #get coordinates of the original streak
p1, p2 = coord1
q1, q2 = coord2[0] #get coordinates of the added synthetic streak
pil_image=Image.fromarray(np.uint8(crop*255))
if os.path.exists(os.path.join(outDir, 'images', img_name)): #check if image already exists
img_name = os.path.splitext(img_name)[0] + '_{}'.format(counter) + '.png'
assert not os.path.exists(os.path.join(outDir, 'images', img_name))
pil_image.save(os.path.join(outDir, 'images',img_name)) #save image
# To do the split randomly
# if (split != 0): #75/25 split
# train.append({'filename': img_name, 'lines': [[int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])], [q1[0], q1[1], q2[0], q2[1]]], 'height': crop.shape[0], 'width': crop.shape[1]})
# else:
# test.append({'filename': img_name, 'lines': [[int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])], [q1[0], q1[1], q2[0], q2[1]]], 'height': crop.shape[0], 'width': crop.shape[1]})
# To do the split by hand
if (image.find('train_split') != -1):
train.append({'filename': img_name, 'lines': [[int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])], [q1[0], q1[1], q2[0], q2[1]]], 'height': crop.shape[0], 'width': crop.shape[1]})
elif (image.find('val_split') != -1):
test.append({'filename': img_name, 'lines': [[int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])], [q1[0], q1[1], q2[0], q2[1]]], 'height': crop.shape[0], 'width': crop.shape[1]})
else:
raise ValueError('Image not in train nor val split')
with open(os.path.join(outDir, "train.json"), "w") as outfile:
json.dump(train, outfile)
with open(os.path.join(outDir, "valid.json"), "w") as outfile:
json.dump(test, outfile)
# path to the folder containing the file label.csv for the annotations, and two folders 'train_split' and 'val_split' for the images
imageDir = 'img/split'
# path to the folder where the dataset will be saved
outDir = "data/raw_dataset/"
create_set(imageDir, outDir, n=1)