Nekshay commited on
Commit
16c3ce8
·
verified ·
1 Parent(s): fced307

Update code.txt

Browse files
Files changed (1) hide show
  1. code.txt +54 -51
code.txt CHANGED
@@ -1,56 +1,59 @@
1
  import os
 
2
  import shutil
3
- from concurrent.futures import ThreadPoolExecutor, as_completed
4
-
5
- # Function to copy images to respective folders based on their labels
6
- def copy_image(file_info):
7
- src, dst_folder = file_info
8
- dst = os.path.join(dst_folder, os.path.basename(src))
9
- try:
10
- shutil.copy2(src, dst) # Copy file to destination folder
11
- except Exception as e:
12
- print(f"Error copying {src}: {e}")
13
-
14
- # Function to organize images into good and bad folders
15
- def organize_images(image_folder, labels, destination_folder, num_threads=100):
16
- # Create destination directories if they don't exist
17
- good_folder = os.path.join(destination_folder, 'good')
18
- bad_folder = os.path.join(destination_folder, 'bad')
19
- os.makedirs(good_folder, exist_ok=True)
20
- os.makedirs(bad_folder, exist_ok=True)
21
-
22
- file_info_list = []
23
-
24
- # Iterate over the labels and create file_info for each image
25
- for image_name, label in labels.items():
26
- src = os.path.join(image_folder, image_name)
27
- if label == "good":
28
- dst_folder = good_folder
29
- else:
30
- dst_folder = bad_folder
31
-
32
- file_info_list.append((src, dst_folder))
33
-
34
- # Use ThreadPoolExecutor to copy files in parallel
 
 
 
 
 
 
35
  with ThreadPoolExecutor(max_workers=num_threads) as executor:
36
- futures = [executor.submit(copy_image, file_info) for file_info in file_info_list]
37
-
38
- # Optional: Track the progress
39
- for future in as_completed(futures):
40
- future.result() # Wait for all threads to complete
 
 
 
 
 
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
- # Define your image folder and destination folder
44
- image_folder = "/path/to/your/image_folder"
45
- destination_folder = "/path/to/your/destination_folder"
46
-
47
- # Your labels dictionary (image_name: label)
48
- labels = {
49
- "image1.jpg": "good",
50
- "image2.jpg": "bad",
51
- "image3.jpg": "good",
52
- # Add the rest of your image labels here (1M entries)
53
- }
54
-
55
- # Organize images using 100 threads
56
- organize_images(image_folder, labels, destination_folder, num_threads=100)
 
1
  import os
2
+ import random
3
  import shutil
4
+ from concurrent.futures import ThreadPoolExecutor
5
+
6
+ # Define paths
7
+ dataset_folder = 'path/to/dataset'
8
+ train_folder = os.path.join(dataset_folder, 'train')
9
+ val_folder = os.path.join(dataset_folder, 'validation')
10
+
11
+ # Create validation folder if it doesn't exist
12
+ os.makedirs(val_folder, exist_ok=True)
13
+
14
+ # Get all label folders inside train folder
15
+ label_folders = [f for f in os.listdir(train_folder) if os.path.isdir(os.path.join(train_folder, f))]
16
+
17
+ # Function to move images from a specific label folder
18
+ def process_label_folder(label_folder, num_threads):
19
+ train_label_folder = os.path.join(train_folder, label_folder)
20
+ val_label_folder = os.path.join(val_folder, label_folder)
21
+
22
+ # Create corresponding validation label folder
23
+ os.makedirs(val_label_folder, exist_ok=True)
24
+
25
+ # Get all images in the train/label_folder
26
+ all_images = os.listdir(train_label_folder)
27
+ total_images = len(all_images)
28
+
29
+ # Calculate 20% of images for validation
30
+ val_size = int(total_images * 0.2)
31
+
32
+ # Randomly select 20% of the images for validation
33
+ val_images = random.sample(all_images, val_size)
34
+
35
+ # Function to move a single image
36
+ def move_image(image):
37
+ src = os.path.join(train_label_folder, image)
38
+ dest = os.path.join(val_label_folder, image)
39
+ shutil.move(src, dest)
40
+
41
+ # Use ThreadPoolExecutor to move images in parallel
42
  with ThreadPoolExecutor(max_workers=num_threads) as executor:
43
+ executor.map(move_image, val_images)
44
+
45
+ print(f"Moved {val_size} images from {label_folder} to validation folder.")
46
+
47
+ # Main function to get user input for number of threads and process folders
48
+ def main():
49
+ # Ask user for the number of threads
50
+ num_threads = int(input("Enter the number of threads to use: "))
51
+
52
+ # Process each label folder using the input number of threads
53
+ for label_folder in label_folders:
54
+ process_label_folder(label_folder, num_threads)
55
+
56
+ print("Validation dataset created.")
57
 
58
  if __name__ == "__main__":
59
+ main()