Update code.txt
Browse files
code.txt
CHANGED
@@ -1,56 +1,59 @@
|
|
1 |
import os
|
|
|
2 |
import shutil
|
3 |
-
from concurrent.futures import ThreadPoolExecutor
|
4 |
-
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
os.
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
if __name__ == "__main__":
|
43 |
-
|
44 |
-
image_folder = "/path/to/your/image_folder"
|
45 |
-
destination_folder = "/path/to/your/destination_folder"
|
46 |
-
|
47 |
-
# Your labels dictionary (image_name: label)
|
48 |
-
labels = {
|
49 |
-
"image1.jpg": "good",
|
50 |
-
"image2.jpg": "bad",
|
51 |
-
"image3.jpg": "good",
|
52 |
-
# Add the rest of your image labels here (1M entries)
|
53 |
-
}
|
54 |
-
|
55 |
-
# Organize images using 100 threads
|
56 |
-
organize_images(image_folder, labels, destination_folder, num_threads=100)
|
|
|
1 |
import os
|
2 |
+
import random
|
3 |
import shutil
|
4 |
+
from concurrent.futures import ThreadPoolExecutor
|
5 |
+
|
6 |
+
# Define paths
|
7 |
+
dataset_folder = 'path/to/dataset'
|
8 |
+
train_folder = os.path.join(dataset_folder, 'train')
|
9 |
+
val_folder = os.path.join(dataset_folder, 'validation')
|
10 |
+
|
11 |
+
# Create validation folder if it doesn't exist
|
12 |
+
os.makedirs(val_folder, exist_ok=True)
|
13 |
+
|
14 |
+
# Get all label folders inside train folder
|
15 |
+
label_folders = [f for f in os.listdir(train_folder) if os.path.isdir(os.path.join(train_folder, f))]
|
16 |
+
|
17 |
+
# Function to move images from a specific label folder
|
18 |
+
def process_label_folder(label_folder, num_threads):
|
19 |
+
train_label_folder = os.path.join(train_folder, label_folder)
|
20 |
+
val_label_folder = os.path.join(val_folder, label_folder)
|
21 |
+
|
22 |
+
# Create corresponding validation label folder
|
23 |
+
os.makedirs(val_label_folder, exist_ok=True)
|
24 |
+
|
25 |
+
# Get all images in the train/label_folder
|
26 |
+
all_images = os.listdir(train_label_folder)
|
27 |
+
total_images = len(all_images)
|
28 |
+
|
29 |
+
# Calculate 20% of images for validation
|
30 |
+
val_size = int(total_images * 0.2)
|
31 |
+
|
32 |
+
# Randomly select 20% of the images for validation
|
33 |
+
val_images = random.sample(all_images, val_size)
|
34 |
+
|
35 |
+
# Function to move a single image
|
36 |
+
def move_image(image):
|
37 |
+
src = os.path.join(train_label_folder, image)
|
38 |
+
dest = os.path.join(val_label_folder, image)
|
39 |
+
shutil.move(src, dest)
|
40 |
+
|
41 |
+
# Use ThreadPoolExecutor to move images in parallel
|
42 |
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
43 |
+
executor.map(move_image, val_images)
|
44 |
+
|
45 |
+
print(f"Moved {val_size} images from {label_folder} to validation folder.")
|
46 |
+
|
47 |
+
# Main function to get user input for number of threads and process folders
|
48 |
+
def main():
|
49 |
+
# Ask user for the number of threads
|
50 |
+
num_threads = int(input("Enter the number of threads to use: "))
|
51 |
+
|
52 |
+
# Process each label folder using the input number of threads
|
53 |
+
for label_folder in label_folders:
|
54 |
+
process_label_folder(label_folder, num_threads)
|
55 |
+
|
56 |
+
print("Validation dataset created.")
|
57 |
|
58 |
if __name__ == "__main__":
|
59 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|