Spaces:

TencentARC
/

DI-PCG

Running on Zero

File size: 19,296 Bytes

import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import cv2
import gin
import bpy
import gc
import logging
import time
import argparse
from pathlib import Path
import importlib
import json
import copy
import imgaug
import imgaug.augmenters as iaa
from core.utils.io import read_list_from_txt
from multiprocessing import Pool
import torch
from core.utils.dinov2 import Dinov2Model

logging.basicConfig(
    format="[%(asctime)s.%(msecs)03d] [%(module)s] [%(levelname)s] | %(message)s",
    datefmt="%H:%M:%S",
    level=logging.INFO,
)
logger = logging.getLogger(__name__)


import infinigen
from infinigen.core import init, surface
from infinigen.assets.utils.decorate import read_co
from infinigen.assets.utils.misc import assign_material
from infinigen.core.util import blender as butil
from infinigen.assets.lighting import (
    hdri_lighting,
    holdout_lighting,
    sky_lighting,
    three_point_lighting,
)
from core.utils.camera import convert_sphere_to_xyz, setup_camera
from core.utils.vis_utils import colorObj, setMat_plastic

# augment
color_aug = iaa.Sequential([
    # color aug
    iaa.WithBrightnessChannels(iaa.Add((-50, 50))),
    iaa.GammaContrast((0.7, 1.5), per_channel=True),
    iaa.AddToHueAndSaturation((-60, 60)),
    iaa.Grayscale((0.0, 0.8)),
])
flip_aug = iaa.Sequential([iaa.Fliplr(0.5)])
crop_aug = iaa.Sequential([
    iaa.CropAndPad(percent=(-0.1, 0.1), pad_mode='constant', pad_cval=(0, 0), keep_size=False),
    iaa.CropToFixedSize(height=256, width=256),
    iaa.PadToFixedSize(height=256, width=256)
])
crop_resize_aug = iaa.KeepSizeByResize(iaa.Crop(percent=(0, 0.1), sample_independently=False, keep_size=False))

def aug(name, save_root, num_aug, flip=True, crop=True):
    """Do the augmentation to RGBA image
    """
    id, name = name.split("/")
    img_rgba = cv2.imread(os.path.join(save_root, id, name), -1) # rgba
    img = cv2.cvtColor(img_rgba[:,:,:3], cv2.COLOR_BGR2RGB)
    img = np.concatenate([img, img_rgba[:,:,3:]], axis=2) # rgba

    save_dir = os.path.join(save_root, id)
    os.makedirs(save_dir, exist_ok=True)
    for j in range(num_aug):
        # do the augmentation here
        image, mask = copy.deepcopy(img[:,:,:3]), copy.deepcopy(img[:,:,3:])
        # aug color
        if np.random.rand() < 0.8:
            image = color_aug(image=image)
        # flip
        if flip:
            image = flip_aug(image=np.concatenate([image, mask], axis=-1))
        else:
            image = np.concatenate([image, mask], axis=-1)
        # crop
        if crop:
            if np.random.rand() < 0.5:
                # crop & pad
                image = crop_aug(image=image)
            else:
                # crop & resize
                image = crop_resize_aug(image=image)
        if np.random.rand() < 0.1:
            # binary image using mask
            image, mask = image[:, :, :3], image[:, :, 3:]
            # black image
            image = np.tile(255 * (1.0 - (mask > 0)), (1,1,3)).astype(np.uint8)
            image = np.concatenate([image, mask], axis=-1)
        if np.random.rand() < 0.2:
            image, mask = image[:, :, :3], image[:, :, 3:]
            edge = np.expand_dims(cv2.Canny(mask, 100, 200), -1)
            mask = (edge > 0).astype(np.uint8)
            # convert edge into black
            edge = 255 * (1.0 - mask)
            image = np.tile(edge, (1,1,3)).astype(np.uint8)
            image = np.concatenate([image, mask], axis=-1)
        # save
        save_name = os.path.join(save_dir, "{}_aug_{}.png".format(name[:-4], j))
        cv2.imwrite(save_name, image)
    print(name)


def randomize_params(params_dict):
    # Initialize the parameters
    selected_params = {}
    for key, value in params_dict.items():
        if value[0] == 'continuous':
            min_v, max_v = value[1][0], value[1][1]
            selected_params[key] = np.random.uniform(min_v, max_v)
        elif value[0] == 'discrete':
            choice_list = value[1]
            selected_params[key] = np.random.choice(choice_list)
        else:
            raise NotImplementedError
    return selected_params

def generate(generator, params, seed, save_dir=None, save_name=None, 
             save_blend=False, save_img=False, save_untexture_img=False, save_gif=False, save_mesh=False,
             cam_dists=[], cam_elevations=[], cam_azimuths=[], zoff=0, 
             resolution='256x256', sample=100, no_mod=False, no_ground=True,
             window=None, screen=None):
    print("Generating")
    # reset to default
    bpy.ops.wm.read_homefile(app_template="")
    butil.clear_scene()
    # Suppress info messages
    bpy.ops.outliner.orphans_purge()
    gc.collect()
    # configurate infinigen
    gin.add_config_file_search_path("configs/infinigen")
    gin.parse_config_files_and_bindings(
        ["configs/infinigen/base.gin"],
        bindings=[],
        skip_unknown=True,
        finalize_config=False,
    )
    surface.registry.initialize_from_gin()
    
    # setup the scene
    scene = bpy.context.scene
    scene.render.engine = "CYCLES"
    scene.cycles.device = "GPU"
    scene.render.film_transparent = True
    bpy.context.preferences.system.scrollback = 0
    bpy.context.preferences.edit.undo_steps = 0
    prefs = bpy.context.preferences.addons["cycles"].preferences
    for dt in prefs.get_device_types(bpy.context):
        prefs.get_devices_for_type(dt[0])
    bpy.context.preferences.addons["cycles"].preferences.compute_device_type = "CUDA"

    use_devices = [d for d in prefs.devices if d.type == "CUDA"]
    for d in prefs.devices:
        d.use = False
    for d in use_devices:
        d.use = True
    
    scene.render.resolution_x, scene.render.resolution_y = map(
        int, resolution.split("x")
    )
    scene.cycles.samples = sample
    bpy.context.scene.render.use_persistent_data = True
    bpy.context.scene.world.node_tree.nodes["Background"].inputs[0].default_value[0:3] = (0.0, 0.0, 0.0)
    
    # update the parameters
    generator.update_params(params)
    # generate the object
    asset = generator.spawn_asset(seed)
    generator.finalize_assets(asset)
    
    parent = asset
    if asset.type == "EMPTY":
        meshes = [o for o in asset.children_recursive if o.type == "MESH"]
        sizes = []
        for m in meshes:
            co = read_co(m)
            sizes.append((np.amax(co, 0) - np.amin(co, 0)).sum())
        i = np.argmax(np.array(sizes))
        asset = meshes[i]
    if not no_mod:
        if parent.animation_data is not None:
            drivers = parent.animation_data.drivers.values()
            for d in drivers:
                parent.driver_remove(d.data_path)
        co = read_co(asset)
        x_min, x_max = np.amin(co, 0), np.amax(co, 0)
        parent.location = -(x_min[0] + x_max[0]) / 2, -(x_min[1] + x_max[1]) / 2, 0
        butil.apply_transform(parent, loc=True)
        if not no_ground:
            bpy.ops.mesh.primitive_grid_add(
                size=5, x_subdivisions=400, y_subdivisions=400
            )
            plane = bpy.context.active_object
            plane.location[-1] = x_min[-1]
            plane.is_shadow_catcher = True
            material = bpy.data.materials.new("plane")
            material.use_nodes = True
            material.node_tree.nodes["Principled BSDF"].inputs[0].default_value = (
                0.015,
                0.009,
                0.003,
                1,
            )
            assign_material(plane, material)
    
    if save_blend:
        # visualize the generated model by rendering
        butil.save_blend(f"{save_dir}/{save_name}.blend", autopack=True)
    
    # render image
    if save_img:
        sky_lighting.add_lighting()
        scene.render.image_settings.file_format = "PNG"
        scene.render.image_settings.color_mode = "RGBA"
        nodes = bpy.data.worlds["World"].node_tree.nodes
        sky_texture = [n for n in nodes if n.name.startswith("Sky Texture")][-1]
        sky_texture.sun_elevation = np.deg2rad(60)
        sky_texture.sun_rotation = np.pi * 0.75
        
        for cd in cam_dists:
            for ce in cam_elevations:
                for ca in cam_azimuths:
                    save_name_full = f"{save_name}_{cd}_{ce}_{ca}"
                    cam_data = convert_sphere_to_xyz(cd, ce, ca)
                    cam_location, cam_rot = cam_data[:3], cam_data[3:]
                    cam_location[-1] += zoff # TODO: fix the table case
                    camera = setup_camera(cam_location=cam_location, cam_rot=cam_rot)
                    cam_info_ng = bpy.data.node_groups.get("nodegroup_active_cam_info")
                    if cam_info_ng is not None:
                        cam_info_ng.nodes["Object Info"].inputs["Object"].default_value = camera

                    image_path = str(f"{save_dir}/{save_name_full}_texture.png")
                    scene.render.filepath = image_path
                    bpy.ops.render.render(write_still=True)
                    # render untextured object
                    if save_untexture_img:
                        bpy.ops.object.shade_smooth()
                        asset.data.materials.clear()
                        # untextured model
                        #RGBA = (144.0/255, 210.0/255, 236.0/255, 1)
                        RGBA = (192.0/255, 192.0/255, 192.0/255, 1)
                        meshColor = colorObj(RGBA, 0.5, 1.0, 1.0, 0.0, 2.0)
                        setMat_plastic(asset, meshColor)
                    image_path = str(f"{save_dir}/{save_name_full}_geometry.png")
                    scene.render.filepath = image_path
                    bpy.ops.render.render(write_still=True)

    # render gif of object rotating
    if save_gif:
        save_gif_dir = os.path.join(save_dir, "gif")
        os.makedirs(save_gif_dir, exist_ok=True)
        bpy.context.scene.frame_end = 60
        asset_parent = asset if asset.parent is None else asset.parent
        asset_parent.driver_add("rotation_euler")[-1].driver.expression = f"frame/{60 / (2 * np.pi * 1)}"
        imgpath = str(f"{save_gif_dir}/{save_name}_###.png")
        scene.render.filepath = str(imgpath)
        bpy.ops.render.render(animation=True)
        from core.utils.io import make_gif
        all_imgpaths = [str(os.path.join(save_gif_dir, p)) for p in sorted(os.listdir(save_gif_dir)) if p.endswith('.png')]
        make_gif(f"{save_gif_dir}/{save_name}.gif", all_imgpaths)
    
    # dump mesh model
    if save_mesh:
        save_mesh_filepath = os.path.join(save_dir, save_name+".glb")
        bpy.ops.export_scene.gltf(filepath=save_mesh_filepath)
        print("Mesh saved in {}".format(save_mesh_filepath))

    return asset, image_path

if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--generator', type=str, default='ChairFactory', 
                           help='Supported generator: [ChairFactory, VaseFactory, TableDiningFactory, BasketBaseFactory, FlowerFactory, DandelionFactory]')
    argparser.add_argument('--save_root', type=str, required=True)
    argparser.add_argument('--total_num', type=int, default=20000)
    argparser.add_argument('--aug_num', type=int, default=5)
    argparser.add_argument('--batch_size', type=int, default=1000)
    argparser.add_argument('--seed', type=int, default=0)
    args = argparser.parse_args()
    
    # setup
    """Training image rendering settings
    Chair: cam_dists - [1.8, 2.0], elevations: [50, 60, 80], azimuths: [0, 30, 60, 80], zoff: 0.0
    Vase: cam_dists - [1.2, 1.6, 2.0], elevations: [60, 80, 90], azimuths: [0], zoff: 0.3
    Table: cam_dists - [5.0, 6.0], elevations: [60, 70], azimuths: [0, 30, 60], zoff: 0.1
    Flower: cam_dists - [3.0, 4.0], elevations: [20, 30, 50, 60], azimuths: [0], zoff: 0
    Dandelion: cam_dists - [3.0], elevations: [90], azimuths: [0], zoff: 0.5
    Basket: cam_dists - [1.2, 1.6], elevations: [50, 60, 70], azimuths: [30, 60], zoff: 0.0
    """
    np.random.seed(args.seed)
    flip, crop = True, True
    # Different training data rendering settings for different generators to improve efficiency
    if args.generator == "ChairFactory":
        cam_dists = [1.8, 2.0]
        elevations = [50, 60, 80]
        azimuths = [0, 30, 60, 80]
        zoff = 0.0
    elif args.generator == "TableDiningFactory":
        cam_dists = [5.0, 6.0]
        elevations = [60, 70]
        azimuths = [0, 30, 60, 90]
        zoff = 0.1
    elif args.generator == "VaseFactory":
        cam_dists = [1.2, 1.6, 2.0]
        elevations = [60, 80, 90]
        azimuths = [0]
        zoff = 0.3
    elif args.generator == "BasketBaseFactory":
        cam_dists = [1.2, 1.6]
        elevations = [50, 60, 70]
        azimuths = [30, 60]
        zoff = 0.0
    elif args.generator == "FlowerFactory":
        cam_dists = [2.0, 3.0, 4.0]
        elevations = [30, 50, 60, 80]
        azimuths = [0]
        zoff = 0
    elif args.generator == "DandelionFactory":
        cam_dists = [3.0]
        elevations = [90]
        azimuths = [0]
        zoff = 0.5
        flip = False
    
    os.makedirs(args.save_root, exist_ok=True)
    train_ratio = 0.9
    sample = 100
    resolution = '256x256'
    
    
    # load the Blender procedural generator
    OBJECTS_PATH = Path("./core/assets/")
    assert OBJECTS_PATH.exists(), OBJECTS_PATH
    generator = None
    for subdir in sorted(list(OBJECTS_PATH.iterdir())):
        clsname = subdir.name.split(".")[0].strip()
        with gin.unlock_config():
            module = importlib.import_module(f"core.assets.{clsname}")
        if hasattr(module, args.generator):
            generator = getattr(module, args.generator)
            logger.info(f"Found {args.generator} in {subdir}")
            break
        logger.debug(f"{args.generator} not found in {subdir}")
    if generator is None:
        raise ModuleNotFoundError(f"{args.generator} not Found.")
    gen = generator(args.seed)
    
    # save params dict file
    params_dict_file = f"{args.save_root}/params_dict.txt"
    json.dump(gen.params_dict, open(params_dict_file, "w"))
    
    # generate data main loop
    for i in range(args.total_num):
        # sample parameters
        params = randomize_params(gen.params_dict)
        # fix dependent parameters
        params_fix_unused = gen.fix_unused_params(params)
        save_name = f"{i:05d}"
        save_dir = f"{args.save_root}/{save_name}"
        os.makedirs(save_dir, exist_ok=True)
        # generate and save rendering - for training data, skip the blend file to save storage
        if i < args.total_num * train_ratio:
            save_blend = False
        else:
            save_blend = True
        asset, img_path = generate(gen, params_fix_unused, args.seed, save_dir=save_dir, save_name=save_name,
                 save_blend=save_blend, save_img=True, cam_dists=cam_dists,
                 cam_elevations=elevations, cam_azimuths=azimuths, zoff=zoff, sample=sample, resolution=resolution)
        # save the parameters
        json.dump(params_fix_unused, open(f"{save_dir}/params.txt", "w"), default=str)
        
        if i % 100 == 0:
            logger.info(f"{i} / {args.total_num} finished")
    
    # write filelist
    f = open(os.path.join(args.save_root, "train_list_mv.txt"), "w")
    total_num = args.total_num
    for i in range(int(total_num * train_ratio)):
        for cam_dist in cam_dists:
            for elevation in elevations:
                for azimuth in azimuths:
                    f.write(
                        "{:05d}/{:05d}_{}_{}_{}.png\n".format(
                            i, i, cam_dist, elevation, azimuth
                        )
                    )
    f.close()
    f = open(os.path.join(args.save_root, "test_list_mv.txt"), "w")
    for i in range(int(total_num * train_ratio), total_num):
        for cam_dist in cam_dists:
            for elevation in elevations:
                for azimuth in azimuths:
                    f.write(
                        "{:05d}/{:05d}_{}_{}_{}.png\n".format(
                            i, i, cam_dist, elevation, azimuth
                            )
                    )
    f.close()
    # do the augmentation
    # main loop
    image_list = read_list_from_txt(os.path.join(args.save_root, "train_list_mv.txt"))
    print("Augmenting...Total data: {}".format(len(image_list)))
    p = Pool(16)
    for i, name in enumerate(image_list):
        p.apply_async(aug, args=(name, args.save_root, args.aug_num, flip, crop))
    p.close()
    p.join()

    # write the new list
    f = open(os.path.join(args.save_root, "train_list_mv_withaug.txt"), "w")
    for i in range(int(total_num * train_ratio)):
        for cam_dist in cam_dists:
            for elevation in elevations:
                for azimuth in azimuths:
                    f.write(
                        "{:05d}/{:05d}_{}_{}_{}.png\n".format(
                            i, i, cam_dist, elevation, azimuth
                        )
                    )
                    for j in range(args.aug_num):
                        f.write(
                            "{:05d}/{:05d}_{}_{}_{}_aug_{}.png\n".format(
                                i, i, cam_dist, elevation, azimuth, j
                            )
                        )
    f.close()
    
    # extract features
    # Setup PyTorch:
    torch.manual_seed(0)
    torch.set_grad_enabled(False)
    dinov2_model = Dinov2Model()
    
    # read image paths
    with open(os.path.join(args.save_root, "train_list_mv_withaug.txt"), "r") as f:
        image_paths = f.readlines()
    with open(os.path.join(args.save_root, "test_list_mv.txt"), "r") as f:
        test_image_paths = f.readlines()
    image_paths = image_paths + test_image_paths
    
    image_paths = [os.path.join(args.save_root, path.strip()) for path in image_paths]
    print(f"Number of images: {len(image_paths)}")
    for i in range(0, len(image_paths), args.batch_size):
        batch_paths = image_paths[i:i + args.batch_size]
        # pre-process the image - RGBA to RGB with white background
        batch_images = []
        for path in batch_paths:
            image = cv2.imread(path, -1)
            mask = (image[...,-1:] > 0)
            image_rgb = cv2.cvtColor(image[...,:3], cv2.COLOR_BGR2RGB)
            # resize if not 256
            if image.shape[0] != 256 or image.shape[1] != 256:
                image_rgb = cv2.resize(image_rgb, (256, 256), interpolation=cv2.INTER_NEAREST)
                mask = cv2.resize((255 * mask[:,:,0]).astype(np.uint8), (256, 256), interpolation=cv2.INTER_NEAREST)
                mask = (mask > 128)[:,:,None]
            # convert the transparent pixels to white background
            image_whiteback = image_rgb * mask + 255 * (1 - mask)
            batch_images.append(np.array(image_whiteback).astype(np.uint8))

        
        batch_features = dinov2_model.encode_batch_imgs(batch_images, global_feat=False).detach().cpu().numpy()
        save_paths = [p.replace(".png", "_dino_token.npz") for p in batch_paths]
        # save the features
        for save_path, feature in zip(save_paths, batch_features):
            np.savez_compressed(save_path, feature)
        print(f"Extracted features for {i} images.")