File size: 2,147 Bytes
bdafe83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import random
import shutil

# Set the path to the main directory where files are located (absolute path to the target directory)
# In this case, the directory contains the original data in JSON format
base_dir = 'real_review/original_data/'

# Define the target directory where the selected JSON files will be copied
# This is a subdirectory within the base directory, named 'selected_files'
selected_base_dir = os.path.join(base_dir, 'selected_files')

# Create a list to store the full paths of all JSON files found in the base directory
json_files = []

# Traverse the base directory and its subdirectories to locate all files
# Collect the paths of files that have a '.json' extension
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith('.json'):  # Check if the file is a JSON file
            json_files.append(os.path.join(root, file))  # Add the full path of the file to the list

# Calculate the number of files to select randomly
# 1% of the total number of JSON files is selected, with a minimum of 1 file
num_files_to_select = max(1, int(len(json_files) * 0.01))

# Randomly select 1% of the JSON files from the list of all files
selected_files = random.sample(json_files, num_files_to_select)

# Print the number of selected files for reference
print(f"Selected {num_files_to_select} file(s):")

# Copy the selected files to the target directory ('selected_files'), preserving their original directory structure
for file in selected_files:
    # Get the relative path of the file (relative to the base directory)
    relative_path = os.path.relpath(file, base_dir)

    # Create the full destination path for the file in the target directory
    dest_file_path = os.path.join(selected_base_dir, relative_path)

    # Ensure that the destination directory exists; if not, create it
    dest_dir = os.path.dirname(dest_file_path)
    os.makedirs(dest_dir, exist_ok=True)

    # Copy the file from the original location to the destination
    shutil.copy(file, dest_file_path)

# Print confirmation message after all files have been successfully copied
print("File copying completed.")