Liangyu commited on
Commit
c7f0cc1
β€’
1 Parent(s): 1e03b30

add functions

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. OpenPSG/checkpoints/epoch_60.pth +3 -0
  2. OpenPSG/configs/_base_/custom_runtime.py +17 -0
  3. OpenPSG/configs/_base_/datasets/psg.py +93 -0
  4. OpenPSG/configs/_base_/datasets/psg_panoptic.py +72 -0
  5. OpenPSG/configs/_base_/datasets/vg_detection.py +56 -0
  6. OpenPSG/configs/_base_/datasets/vg_sg.py +57 -0
  7. OpenPSG/configs/_base_/models/detr4seg_r101.py +64 -0
  8. OpenPSG/configs/_base_/models/detr4seg_r101_psg.py +137 -0
  9. OpenPSG/configs/_base_/models/detr4seg_r50.py +65 -0
  10. OpenPSG/configs/_base_/models/detr4seg_r50_psg.py +152 -0
  11. OpenPSG/configs/_base_/models/detr_r50.py +64 -0
  12. OpenPSG/configs/_base_/models/mask_rcnn_r50_fpn.py +107 -0
  13. OpenPSG/configs/_base_/models/panoptic_fpn_r101_fpn_psg.py +8 -0
  14. OpenPSG/configs/_base_/models/panoptic_fpn_r50_fpn_psg.py +74 -0
  15. OpenPSG/configs/_base_/models/psgtr_r101.py +5 -0
  16. OpenPSG/configs/_base_/models/psgtr_r50.py +82 -0
  17. OpenPSG/configs/_base_/schedules/schedule_1x.py +10 -0
  18. OpenPSG/configs/_base_/schedules/schedule_3x.py +10 -0
  19. OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_predcls_psg.py +26 -0
  20. OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +26 -0
  21. OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_predcls_psg.py +41 -0
  22. OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +45 -0
  23. OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
  24. OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +26 -0
  25. OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_predcls_psg.py +44 -0
  26. OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +48 -0
  27. OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
  28. OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +28 -0
  29. OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py +241 -0
  30. OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +44 -0
  31. OpenPSG/configs/psgformer/psgformer_r101_psg.py +16 -0
  32. OpenPSG/configs/psgformer/psgformer_r50.py +96 -0
  33. OpenPSG/configs/psgformer/psgformer_r50_psg.py +244 -0
  34. OpenPSG/configs/psgformer/psgformer_r50_psg_inference.py +31 -0
  35. OpenPSG/configs/psgtr/psgtr_r101_psg.py +231 -0
  36. OpenPSG/configs/psgtr/psgtr_r50.py +82 -0
  37. OpenPSG/configs/psgtr/psgtr_r50_psg.py +233 -0
  38. OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py +31 -0
  39. OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_predcls_psg.py +28 -0
  40. OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_sgdet_psg.py +28 -0
  41. OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_predcls_psg.py +43 -0
  42. OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_sgdet_psg.py +49 -0
  43. README.md +3 -3
  44. app.py +133 -13
  45. fake_gan.py +56 -0
  46. images/cooking.jpg +0 -0
  47. images/forrest-gump.jpg +0 -0
  48. images/friends.jpg +0 -0
  49. images/mbappe.jpg +0 -0
  50. images/messi.jpg +0 -0
OpenPSG/checkpoints/epoch_60.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4ddcbda74686568b7e6b8145f7f33030407e27e390c37c23206f95c51829ed
3
+ size 531751994
OpenPSG/configs/_base_/custom_runtime.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint_config = dict(interval=1, max_keep_ckpts=1)
2
+ # yapf:disable
3
+ log_config = dict(
4
+ interval=50,
5
+ hooks=[
6
+ dict(type='TextLoggerHook'),
7
+ # dict(type='TensorboardLoggerHook')
8
+ ])
9
+ # yapf:enable
10
+ custom_hooks = [dict(type='NumClassCheckHook')]
11
+
12
+ dist_params = dict(backend='nccl')
13
+ log_level = 'INFO'
14
+ load_from = None
15
+ resume_from = None
16
+
17
+ workflow = [('train', 1), ('val', 1)]
OpenPSG/configs/_base_/datasets/psg.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PanopticSceneGraphDataset'
3
+ ann_file = './data/psg/psg.json'
4
+ coco_root = 'data/coco'
5
+
6
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ to_rgb=True)
9
+ train_pipeline = [
10
+ dict(type='LoadImageFromFile'),
11
+ dict(
12
+ type='LoadPanopticSceneGraphAnnotations',
13
+ with_bbox=True,
14
+ with_rel=True,
15
+ with_mask=True,
16
+ with_seg=True,
17
+ ),
18
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
19
+ dict(type='RandomFlip', flip_ratio=0.5),
20
+ dict(type='Normalize', **img_norm_cfg),
21
+ dict(type='Pad', size_divisor=32),
22
+ dict(type='SegRescale', scale_factor=1 / 4),
23
+ dict(type='SceneGraphFormatBundle'),
24
+ dict(
25
+ type='Collect',
26
+ keys=[
27
+ 'img',
28
+ 'gt_bboxes',
29
+ 'gt_labels',
30
+ 'gt_rels',
31
+ 'gt_relmaps',
32
+ 'gt_masks',
33
+ 'gt_semantic_seg',
34
+ ],
35
+ ),
36
+ ]
37
+ test_pipeline = [
38
+ dict(type='LoadImageFromFile'),
39
+ # Since the forward process may need gt info, annos must be loaded.
40
+ dict(type='LoadPanopticSceneGraphAnnotations',
41
+ with_bbox=True,
42
+ with_rel=True),
43
+ dict(
44
+ type='MultiScaleFlipAug',
45
+ img_scale=(1333, 800),
46
+ flip=False,
47
+ transforms=[
48
+ dict(type='Resize', keep_ratio=True),
49
+ dict(type='RandomFlip'),
50
+ dict(type='Normalize', **img_norm_cfg),
51
+ dict(type='Pad', size_divisor=32),
52
+ # NOTE: Do not change the img to DC.
53
+ dict(type='ImageToTensor', keys=['img']),
54
+ dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
55
+ dict(
56
+ type='ToDataContainer',
57
+ fields=(dict(key='gt_bboxes'), dict(key='gt_labels')),
58
+ ),
59
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
60
+ ],
61
+ ),
62
+ ]
63
+ data = dict(
64
+ samples_per_gpu=2,
65
+ workers_per_gpu=2,
66
+ train=dict(
67
+ type=dataset_type,
68
+ ann_file=ann_file,
69
+ img_prefix=coco_root,
70
+ seg_prefix=coco_root,
71
+ pipeline=train_pipeline,
72
+ split='train',
73
+ all_bboxes=True,
74
+ ),
75
+ val=dict(
76
+ type=dataset_type,
77
+ ann_file=ann_file,
78
+ img_prefix=coco_root,
79
+ seg_prefix=coco_root,
80
+ pipeline=test_pipeline,
81
+ split='test',
82
+ all_bboxes=True,
83
+ ),
84
+ test=dict(
85
+ type=dataset_type,
86
+ ann_file=ann_file,
87
+ img_prefix=coco_root,
88
+ seg_prefix=coco_root,
89
+ pipeline=test_pipeline,
90
+ split='test',
91
+ all_bboxes=True,
92
+ ),
93
+ )
OpenPSG/configs/_base_/datasets/psg_panoptic.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'PanopticSceneGraphDataset'
3
+ ann_file = './data/psg/psg.json'
4
+ coco_root = './data/coco'
5
+
6
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ to_rgb=True)
9
+ train_pipeline = [
10
+ dict(type='LoadImageFromFile'),
11
+ dict(
12
+ type='LoadPanopticSceneGraphAnnotations',
13
+ with_bbox=True,
14
+ with_mask=True,
15
+ with_seg=True,
16
+ ),
17
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
18
+ dict(type='RandomFlip', flip_ratio=0.5),
19
+ dict(type='Normalize', **img_norm_cfg),
20
+ dict(type='Pad', size_divisor=32),
21
+ dict(type='SegRescale', scale_factor=1 / 4),
22
+ dict(type='DefaultFormatBundle'),
23
+ dict(
24
+ type='Collect',
25
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg'],
26
+ ),
27
+ ]
28
+ test_pipeline = [
29
+ dict(type='LoadImageFromFile'),
30
+ dict(
31
+ type='MultiScaleFlipAug',
32
+ img_scale=(1333, 800),
33
+ flip=False,
34
+ transforms=[
35
+ dict(type='Resize', keep_ratio=True),
36
+ dict(type='RandomFlip'),
37
+ dict(type='Normalize', **img_norm_cfg),
38
+ dict(type='Pad', size_divisor=32),
39
+ dict(type='ImageToTensor', keys=['img']),
40
+ dict(type='Collect', keys=['img']),
41
+ ],
42
+ ),
43
+ ]
44
+ data = dict(
45
+ samples_per_gpu=2,
46
+ workers_per_gpu=2,
47
+ train=dict(
48
+ type=dataset_type,
49
+ ann_file=ann_file,
50
+ img_prefix=coco_root,
51
+ seg_prefix=coco_root,
52
+ pipeline=train_pipeline,
53
+ split='train',
54
+ ),
55
+ val=dict(
56
+ type=dataset_type,
57
+ ann_file=ann_file,
58
+ img_prefix=coco_root,
59
+ seg_prefix=coco_root,
60
+ pipeline=test_pipeline,
61
+ split='test',
62
+ ),
63
+ test=dict(
64
+ type=dataset_type,
65
+ ann_file=ann_file,
66
+ img_prefix=coco_root,
67
+ seg_prefix=coco_root,
68
+ pipeline=test_pipeline,
69
+ split='test',
70
+ ),
71
+ )
72
+ evaluation = dict(interval=1, metric='PQ')
OpenPSG/configs/_base_/datasets/vg_detection.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ custom_imports = dict(imports=[
3
+ 'openpsg.datasets',
4
+ 'openpsg.datasets.pipelines',
5
+ ],
6
+ allow_failed_imports=False)
7
+
8
+ dataset_type = 'SceneGraphDataset'
9
+ ann_file = 'data/vg/data_openpsg.json'
10
+ img_dir = 'data/vg/VG_100K'
11
+
12
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
13
+ std=[58.395, 57.12, 57.375],
14
+ to_rgb=True)
15
+ train_pipeline = [
16
+ dict(type='LoadImageFromFile'),
17
+ dict(type='LoadSceneGraphAnnotations', with_bbox=True),
18
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
19
+ dict(type='RandomFlip', flip_ratio=0.5),
20
+ dict(type='Normalize', **img_norm_cfg),
21
+ dict(type='Pad', size_divisor=32),
22
+ dict(type='DefaultFormatBundle'),
23
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
24
+ ]
25
+ test_pipeline = [
26
+ dict(type='LoadImageFromFile'),
27
+ dict(type='MultiScaleFlipAug',
28
+ img_scale=(1333, 800),
29
+ flip=False,
30
+ transforms=[
31
+ dict(type='Resize', keep_ratio=True),
32
+ dict(type='RandomFlip'),
33
+ dict(type='Normalize', **img_norm_cfg),
34
+ dict(type='Pad', size_divisor=32),
35
+ dict(type='ImageToTensor', keys=['img']),
36
+ dict(type='Collect', keys=['img']),
37
+ ])
38
+ ]
39
+ data = dict(samples_per_gpu=2,
40
+ workers_per_gpu=2,
41
+ train=dict(type=dataset_type,
42
+ ann_file=ann_file,
43
+ img_prefix=img_dir,
44
+ pipeline=train_pipeline,
45
+ split='train'),
46
+ val=dict(type=dataset_type,
47
+ ann_file=ann_file,
48
+ img_prefix=img_dir,
49
+ pipeline=test_pipeline,
50
+ split='test'),
51
+ test=dict(type=dataset_type,
52
+ ann_file=ann_file,
53
+ img_prefix=img_dir,
54
+ pipeline=test_pipeline,
55
+ split='test'))
56
+ evaluation = dict(interval=1, metric='bbox')
OpenPSG/configs/_base_/datasets/vg_sg.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'SceneGraphDataset'
3
+ ann_file = '/mnt/ssd/gzj/data/VisualGenome/data_openpsg.json'
4
+ img_dir = '/mnt/ssd/gzj/data/VisualGenome/VG_100K'
5
+
6
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ to_rgb=True)
9
+ train_pipeline = [
10
+ dict(type='LoadImageFromFile'),
11
+ dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
12
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
13
+ dict(type='RandomFlip', flip_ratio=0.5),
14
+ dict(type='Normalize', **img_norm_cfg),
15
+ dict(type='Pad', size_divisor=32),
16
+ dict(type='SceneGraphFormatBundle'),
17
+ dict(type='Collect',
18
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_relmaps']),
19
+ ]
20
+ test_pipeline = [
21
+ dict(type='LoadImageFromFile'),
22
+ # Since the forward process may need gt info, annos must be loaded.
23
+ dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
24
+ dict(
25
+ type='MultiScaleFlipAug',
26
+ img_scale=(1333, 800),
27
+ flip=False,
28
+ transforms=[
29
+ dict(type='Resize', keep_ratio=True),
30
+ dict(type='RandomFlip'),
31
+ dict(type='Normalize', **img_norm_cfg),
32
+ dict(type='Pad', size_divisor=32),
33
+ # NOTE: Do not change the img to DC.
34
+ dict(type='ImageToTensor', keys=['img']),
35
+ dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
36
+ dict(type='ToDataContainer',
37
+ fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
38
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
39
+ ])
40
+ ]
41
+ data = dict(samples_per_gpu=2,
42
+ workers_per_gpu=2,
43
+ train=dict(type=dataset_type,
44
+ ann_file=ann_file,
45
+ img_prefix=img_dir,
46
+ pipeline=train_pipeline,
47
+ split='train'),
48
+ val=dict(type=dataset_type,
49
+ ann_file=ann_file,
50
+ img_prefix=img_dir,
51
+ pipeline=test_pipeline,
52
+ split='test'),
53
+ test=dict(type=dataset_type,
54
+ ann_file=ann_file,
55
+ img_prefix=img_dir,
56
+ pipeline=test_pipeline,
57
+ split='test'))
OpenPSG/configs/_base_/models/detr4seg_r101.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='DETR4seg',
3
+ backbone=dict(type='ResNet',
4
+ depth=101,
5
+ num_stages=4,
6
+ out_indices=(0, 1, 2, 3),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet101')),
13
+ bbox_head=dict(type='detr4segHead',
14
+ num_classes=80,
15
+ in_channels=2048,
16
+ transformer=dict(
17
+ type='Transformer',
18
+ encoder=dict(type='DetrTransformerEncoder',
19
+ num_layers=6,
20
+ transformerlayers=dict(
21
+ type='BaseTransformerLayer',
22
+ attn_cfgs=[
23
+ dict(type='MultiheadAttention',
24
+ embed_dims=256,
25
+ num_heads=8,
26
+ dropout=0.1)
27
+ ],
28
+ feedforward_channels=2048,
29
+ ffn_dropout=0.1,
30
+ operation_order=('self_attn', 'norm',
31
+ 'ffn', 'norm'))),
32
+ decoder=dict(
33
+ type='DetrTransformerDecoder',
34
+ return_intermediate=True,
35
+ num_layers=6,
36
+ transformerlayers=dict(
37
+ type='DetrTransformerDecoderLayer',
38
+ attn_cfgs=dict(type='MultiheadAttention',
39
+ embed_dims=256,
40
+ num_heads=8,
41
+ dropout=0.1),
42
+ feedforward_channels=2048,
43
+ ffn_dropout=0.1,
44
+ operation_order=('self_attn', 'norm',
45
+ 'cross_attn', 'norm', 'ffn',
46
+ 'norm')),
47
+ )),
48
+ positional_encoding=dict(type='SinePositionalEncoding',
49
+ num_feats=128,
50
+ normalize=True),
51
+ loss_cls=dict(type='CrossEntropyLoss',
52
+ use_sigmoid=False,
53
+ loss_weight=1.0,
54
+ class_weight=1.0),
55
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
56
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0),
57
+ dice_loss=dict(type='DiceLoss', loss_weight=1.0)),
58
+ # training and testing settings
59
+ train_cfg=dict(assigner=dict(
60
+ type='HungarianAssigner',
61
+ cls_cost=dict(type='ClassificationCost', weight=1.),
62
+ reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
63
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
64
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/_base_/models/detr4seg_r101_psg.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/detr4seg_r101.py', '../_base_/datasets/psg.py',
3
+ '../_base_/custom_runtime.py'
4
+ ]
5
+
6
+ custom_imports = dict(imports=[
7
+ 'openpsg.models.frameworks.detr4seg',
8
+ 'openpsg.models.relation_heads.detr4seg_head', 'openpsg.datasets',
9
+ 'openpsg.datasets.pipelines.loading',
10
+ 'openpsg.datasets.pipelines.rel_randomcrop',
11
+ 'openpsg.models.relation_heads.approaches.matcher',
12
+ 'openpsg.models.losses.seg_losses'
13
+ ],
14
+ allow_failed_imports=False)
15
+
16
+ object_classes = [
17
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
18
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
19
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
20
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
21
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
22
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
23
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
24
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
25
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
26
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
27
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
28
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
29
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
30
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
31
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
32
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
33
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
34
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
35
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
36
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
37
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
38
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
39
+ 'wall-other-merged', 'rug-merged'
40
+ ]
41
+
42
+ model = dict(bbox_head=dict(
43
+ num_classes=len(object_classes),
44
+ object_classes=object_classes,
45
+ ))
46
+
47
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
48
+ std=[58.395, 57.12, 57.375],
49
+ to_rgb=True)
50
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
51
+ # from the default setting in mmdet.
52
+ train_pipeline = [
53
+ dict(type='LoadImageFromFile'),
54
+ dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
55
+ dict(type='RandomFlip', flip_ratio=0.5),
56
+ dict(
57
+ type='AutoAugment',
58
+ policies=[
59
+ [
60
+ dict(type='Resize',
61
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
62
+ (576, 1333), (608, 1333), (640, 1333),
63
+ (672, 1333), (704, 1333), (736, 1333),
64
+ (768, 1333), (800, 1333)],
65
+ multiscale_mode='value',
66
+ keep_ratio=True)
67
+ ],
68
+ [
69
+ dict(type='Resize',
70
+ img_scale=[(400, 1333), (500, 1333), (600, 1333)],
71
+ multiscale_mode='value',
72
+ keep_ratio=True),
73
+ dict(type='RandomCrop',
74
+ crop_type='absolute_range',
75
+ crop_size=(384, 600),
76
+ allow_negative_crop=False), # no empty relations
77
+ dict(type='Resize',
78
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
79
+ (576, 1333), (608, 1333), (640, 1333),
80
+ (672, 1333), (704, 1333), (736, 1333),
81
+ (768, 1333), (800, 1333)],
82
+ multiscale_mode='value',
83
+ override=True,
84
+ keep_ratio=True)
85
+ ]
86
+ ]),
87
+ dict(type='Normalize', **img_norm_cfg),
88
+ dict(type='Pad', size_divisor=1),
89
+ dict(type='RelsFormatBundle'),
90
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
91
+ ]
92
+ # test_pipeline, NOTE the Pad's size_divisor is different from the default
93
+ # setting (size_divisor=32). While there is little effect on the performance
94
+ # whether we use the default setting or use size_divisor=1.
95
+ test_pipeline = [
96
+ dict(type='LoadImageFromFile'),
97
+ dict(type='MultiScaleFlipAug',
98
+ img_scale=(1333, 800),
99
+ flip=False,
100
+ transforms=[
101
+ dict(type='Resize', keep_ratio=True),
102
+ dict(type='RandomFlip'),
103
+ dict(type='Normalize', **img_norm_cfg),
104
+ dict(type='Pad', size_divisor=1),
105
+ dict(type='ImageToTensor', keys=['img']),
106
+ dict(type='Collect', keys=['img'])
107
+ ])
108
+ ]
109
+ data = dict(samples_per_gpu=2,
110
+ workers_per_gpu=2,
111
+ train=dict(pipeline=train_pipeline),
112
+ val=dict(pipeline=test_pipeline),
113
+ test=dict(pipeline=test_pipeline))
114
+ # optimizer
115
+ optimizer = dict(
116
+ type='AdamW',
117
+ lr=0.0001,
118
+ weight_decay=0.0001,
119
+ paramwise_cfg=dict(
120
+ custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))
121
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
122
+
123
+ # learning policy
124
+ lr_config = dict(policy='step', step=110)
125
+ runner = dict(type='EpochBasedRunner', max_epochs=150)
126
+
127
+ project_name = 'detr4seg'
128
+ expt_name = 'detr4seg_r101_coco'
129
+ work_dir = f'./work_dirs/{expt_name}'
130
+
131
+ log_config = dict(
132
+ interval=50,
133
+ hooks=[dict(type='TextLoggerHook'),
134
+ dict(type='TensorboardLoggerHook')],
135
+ )
136
+
137
+ load_from = '/mnt/ssd/gzj/test/OpenPSG/detr_r50_fb_origin.pth'
OpenPSG/configs/_base_/models/detr4seg_r50.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='DETR4seg',
3
+ backbone=dict(type='ResNet',
4
+ depth=50,
5
+ num_stages=4,
6
+ out_indices=(0, 1, 2, 3),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(type='detr4segHead',
14
+ num_classes=80,
15
+ in_channels=2048,
16
+ transformer=dict(
17
+ type='Transformer',
18
+ encoder=dict(type='DetrTransformerEncoder',
19
+ num_layers=6,
20
+ transformerlayers=dict(
21
+ type='BaseTransformerLayer',
22
+ attn_cfgs=[
23
+ dict(type='MultiheadAttention',
24
+ embed_dims=256,
25
+ num_heads=8,
26
+ dropout=0.1)
27
+ ],
28
+ feedforward_channels=2048,
29
+ ffn_dropout=0.1,
30
+ operation_order=('self_attn', 'norm',
31
+ 'ffn', 'norm'))),
32
+ decoder=dict(
33
+ type='DetrTransformerDecoder',
34
+ return_intermediate=True,
35
+ num_layers=6,
36
+ transformerlayers=dict(
37
+ type='DetrTransformerDecoderLayer',
38
+ attn_cfgs=dict(type='MultiheadAttention',
39
+ embed_dims=256,
40
+ num_heads=8,
41
+ dropout=0.1),
42
+ feedforward_channels=2048,
43
+ ffn_dropout=0.1,
44
+ operation_order=('self_attn', 'norm',
45
+ 'cross_attn', 'norm', 'ffn',
46
+ 'norm')),
47
+ )),
48
+ positional_encoding=dict(type='SinePositionalEncoding',
49
+ num_feats=128,
50
+ normalize=True),
51
+ loss_cls=dict(type='CrossEntropyLoss',
52
+ use_sigmoid=False,
53
+ loss_weight=1.0,
54
+ class_weight=1.0),
55
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
56
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0),
57
+ focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
58
+ dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0)),
59
+ # training and testing settings
60
+ train_cfg=dict(assigner=dict(
61
+ type='HungarianAssigner',
62
+ cls_cost=dict(type='ClassificationCost', weight=1.),
63
+ reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
64
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
65
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/_base_/models/detr4seg_r50_psg.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ['./detr4seg_r50.py', '../datasets/psg.py', '../custom_runtime.py']
2
+
3
+ custom_imports = dict(imports=[
4
+ 'openpsg.models.frameworks.detr4seg',
5
+ 'openpsg.models.relation_heads.detr4seg_head', 'openpsg.datasets',
6
+ 'openpsg.datasets.pipelines.loading',
7
+ 'openpsg.datasets.pipelines.rel_randomcrop',
8
+ 'openpsg.models.relation_heads.approaches.matcher',
9
+ 'openpsg.models.losses.seg_losses'
10
+ ],
11
+ allow_failed_imports=False)
12
+
13
+ object_classes = [
14
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
15
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
16
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
17
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
18
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
19
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
20
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
21
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
22
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
23
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
24
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
25
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
26
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
27
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
28
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
29
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
30
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
31
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
32
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
33
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
34
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
35
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
36
+ 'wall-other-merged', 'rug-merged'
37
+ ]
38
+
39
+ model = dict(bbox_head=dict(
40
+ num_classes=len(object_classes),
41
+ object_classes=object_classes,
42
+ ))
43
+
44
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
45
+ std=[58.395, 57.12, 57.375],
46
+ to_rgb=True)
47
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
48
+ # from the default setting in mmdet.
49
+ train_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(type='LoadPanopticSceneGraphAnnotations',
52
+ with_bbox=True,
53
+ with_mask=True,
54
+ with_seg=True),
55
+ dict(type='RandomFlip', flip_ratio=0.5),
56
+ dict(
57
+ type='AutoAugment',
58
+ policies=[
59
+ [
60
+ dict(type='Resize',
61
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
62
+ (576, 1333), (608, 1333), (640, 1333),
63
+ (672, 1333), (704, 1333), (736, 1333),
64
+ (768, 1333), (800, 1333)],
65
+ multiscale_mode='value',
66
+ keep_ratio=True)
67
+ ],
68
+ [
69
+ dict(type='Resize',
70
+ img_scale=[(400, 1333), (500, 1333), (600, 1333)],
71
+ multiscale_mode='value',
72
+ keep_ratio=True),
73
+ dict(type='RandomCrop',
74
+ crop_type='absolute_range',
75
+ crop_size=(384, 600),
76
+ allow_negative_crop=False), # no empty relations
77
+ dict(type='Resize',
78
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
79
+ (576, 1333), (608, 1333), (640, 1333),
80
+ (672, 1333), (704, 1333), (736, 1333),
81
+ (768, 1333), (800, 1333)],
82
+ multiscale_mode='value',
83
+ override=True,
84
+ keep_ratio=True)
85
+ ]
86
+ ]),
87
+ dict(type='Normalize', **img_norm_cfg),
88
+ dict(type='Pad', size_divisor=1),
89
+ dict(type='RelsFormatBundle'),
90
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])
91
+ ]
92
+ # test_pipeline, NOTE the Pad's size_divisor is different from the default
93
+ # setting (size_divisor=32). While there is little effect on the performance
94
+ # whether we use the default setting or use size_divisor=1.
95
+ test_pipeline = [
96
+ dict(type='LoadImageFromFile'),
97
+ dict(type='MultiScaleFlipAug',
98
+ img_scale=(1333, 800),
99
+ flip=False,
100
+ transforms=[
101
+ dict(type='Resize', keep_ratio=True),
102
+ dict(type='RandomFlip'),
103
+ dict(type='Normalize', **img_norm_cfg),
104
+ dict(type='Pad', size_divisor=1),
105
+ dict(type='ImageToTensor', keys=['img']),
106
+ dict(type='Collect', keys=['img'])
107
+ ])
108
+ ]
109
+ data = dict(samples_per_gpu=1,
110
+ workers_per_gpu=1,
111
+ train=dict(pipeline=train_pipeline),
112
+ val=dict(pipeline=test_pipeline),
113
+ test=dict(pipeline=test_pipeline))
114
+ # optimizer
115
+ optimizer = dict(type='AdamW',
116
+ lr=0.00001,
117
+ weight_decay=0.0001,
118
+ paramwise_cfg=dict(
119
+ custom_keys={
120
+ 'backbone': dict(lr_mult=0.1, decay_mult=1.0),
121
+ 'bbox_attention': dict(lr_mult=10.0, decay_mult=1.0),
122
+ 'mask_head': dict(lr_mult=10.0, decay_mult=1.0)
123
+ }))
124
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
125
+
126
+ # learning policy
127
+ lr_config = dict(policy='step', step=8)
128
+ runner = dict(type='EpochBasedRunner', max_epochs=10)
129
+
130
+ evaluation = dict(interval=1, metric='PQ')
131
+ checkpoint_config = dict(interval=1, max_keep_ckpts=10)
132
+
133
+ project_name = 'detr4seg'
134
+ expt_name = 'test_detr4seg_r50_psg'
135
+ work_dir = f'./work_dirs/{expt_name}'
136
+
137
+ log_config = dict(
138
+ interval=50,
139
+ hooks=[
140
+ dict(type='TextLoggerHook'),
141
+ dict(type='TensorboardLoggerHook'),
142
+ dict(
143
+ type='WandbLoggerHook',
144
+ init_kwargs=dict(
145
+ project=project_name,
146
+ name=expt_name,
147
+ # config=work_dir + "/cfg.yaml"
148
+ ))
149
+ ],
150
+ )
151
+
152
+ load_from = 'detr_pan_r50.pth'
OpenPSG/configs/_base_/models/detr_r50.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='DETR',
3
+ backbone=dict(type='ResNet',
4
+ depth=50,
5
+ num_stages=4,
6
+ out_indices=(3, ),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(type='DETRHead',
14
+ num_classes=80,
15
+ in_channels=2048,
16
+ transformer=dict(
17
+ type='Transformer',
18
+ encoder=dict(type='DetrTransformerEncoder',
19
+ num_layers=6,
20
+ transformerlayers=dict(
21
+ type='BaseTransformerLayer',
22
+ attn_cfgs=[
23
+ dict(type='MultiheadAttention',
24
+ embed_dims=256,
25
+ num_heads=8,
26
+ dropout=0.1)
27
+ ],
28
+ feedforward_channels=2048,
29
+ ffn_dropout=0.1,
30
+ operation_order=('self_attn', 'norm',
31
+ 'ffn', 'norm'))),
32
+ decoder=dict(
33
+ type='DetrTransformerDecoder',
34
+ return_intermediate=True,
35
+ num_layers=6,
36
+ transformerlayers=dict(
37
+ type='DetrTransformerDecoderLayer',
38
+ attn_cfgs=dict(type='MultiheadAttention',
39
+ embed_dims=256,
40
+ num_heads=8,
41
+ dropout=0.1),
42
+ feedforward_channels=2048,
43
+ ffn_dropout=0.1,
44
+ operation_order=('self_attn', 'norm',
45
+ 'cross_attn', 'norm', 'ffn',
46
+ 'norm')),
47
+ )),
48
+ positional_encoding=dict(type='SinePositionalEncoding',
49
+ num_feats=128,
50
+ normalize=True),
51
+ loss_cls=dict(type='CrossEntropyLoss',
52
+ bg_cls_weight=0.1,
53
+ use_sigmoid=False,
54
+ loss_weight=1.0,
55
+ class_weight=1.0),
56
+ loss_bbox=dict(type='L1Loss', loss_weight=5.0),
57
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
58
+ # training and testing settings
59
+ train_cfg=dict(assigner=dict(
60
+ type='HungarianAssigner',
61
+ cls_cost=dict(type='ClassificationCost', weight=1.),
62
+ reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
63
+ iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
64
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/_base_/models/mask_rcnn_r50_fpn.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='MaskRCNN',
4
+ backbone=dict(type='ResNet',
5
+ depth=50,
6
+ num_stages=4,
7
+ out_indices=(0, 1, 2, 3),
8
+ frozen_stages=1,
9
+ norm_cfg=dict(type='BN', requires_grad=True),
10
+ norm_eval=True,
11
+ style='pytorch',
12
+ init_cfg=dict(type='Pretrained',
13
+ checkpoint='torchvision://resnet50')),
14
+ neck=dict(type='FPN',
15
+ in_channels=[256, 512, 1024, 2048],
16
+ out_channels=256,
17
+ num_outs=5),
18
+ rpn_head=dict(type='RPNHead',
19
+ in_channels=256,
20
+ feat_channels=256,
21
+ anchor_generator=dict(type='AnchorGenerator',
22
+ scales=[8],
23
+ ratios=[0.5, 1.0, 2.0],
24
+ strides=[4, 8, 16, 32, 64]),
25
+ bbox_coder=dict(type='DeltaXYWHBBoxCoder',
26
+ target_means=[.0, .0, .0, .0],
27
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
28
+ loss_cls=dict(type='CrossEntropyLoss',
29
+ use_sigmoid=True,
30
+ loss_weight=1.0),
31
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
32
+ roi_head=dict(type='StandardRoIHead',
33
+ bbox_roi_extractor=dict(type='SingleRoIExtractor',
34
+ roi_layer=dict(type='RoIAlign',
35
+ output_size=7,
36
+ sampling_ratio=0),
37
+ out_channels=256,
38
+ featmap_strides=[4, 8, 16, 32]),
39
+ bbox_head=dict(
40
+ type='Shared2FCBBoxHead',
41
+ in_channels=256,
42
+ fc_out_channels=1024,
43
+ roi_feat_size=7,
44
+ num_classes=80,
45
+ bbox_coder=dict(type='DeltaXYWHBBoxCoder',
46
+ target_means=[0., 0., 0., 0.],
47
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
48
+ reg_class_agnostic=False,
49
+ loss_cls=dict(type='CrossEntropyLoss',
50
+ use_sigmoid=False,
51
+ loss_weight=1.0),
52
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
53
+ mask_roi_extractor=dict(type='SingleRoIExtractor',
54
+ roi_layer=dict(type='RoIAlign',
55
+ output_size=14,
56
+ sampling_ratio=0),
57
+ out_channels=256,
58
+ featmap_strides=[4, 8, 16, 32]),
59
+ mask_head=dict(type='FCNMaskHead',
60
+ num_convs=4,
61
+ in_channels=256,
62
+ conv_out_channels=256,
63
+ num_classes=80,
64
+ loss_mask=dict(type='CrossEntropyLoss',
65
+ use_mask=True,
66
+ loss_weight=1.0))),
67
+ # model training and testing settings
68
+ train_cfg=dict(rpn=dict(assigner=dict(type='MaxIoUAssigner',
69
+ pos_iou_thr=0.7,
70
+ neg_iou_thr=0.3,
71
+ min_pos_iou=0.3,
72
+ match_low_quality=True,
73
+ ignore_iof_thr=-1),
74
+ sampler=dict(type='RandomSampler',
75
+ num=256,
76
+ pos_fraction=0.5,
77
+ neg_pos_ub=-1,
78
+ add_gt_as_proposals=False),
79
+ allowed_border=-1,
80
+ pos_weight=-1,
81
+ debug=False),
82
+ rpn_proposal=dict(nms_pre=2000,
83
+ max_per_img=1000,
84
+ nms=dict(type='nms', iou_threshold=0.7),
85
+ min_bbox_size=0),
86
+ rcnn=dict(assigner=dict(type='MaxIoUAssigner',
87
+ pos_iou_thr=0.5,
88
+ neg_iou_thr=0.5,
89
+ min_pos_iou=0.5,
90
+ match_low_quality=True,
91
+ ignore_iof_thr=-1),
92
+ sampler=dict(type='RandomSampler',
93
+ num=512,
94
+ pos_fraction=0.25,
95
+ neg_pos_ub=-1,
96
+ add_gt_as_proposals=True),
97
+ mask_size=28,
98
+ pos_weight=-1,
99
+ debug=False)),
100
+ test_cfg=dict(rpn=dict(nms_pre=1000,
101
+ max_per_img=1000,
102
+ nms=dict(type='nms', iou_threshold=0.7),
103
+ min_bbox_size=0),
104
+ rcnn=dict(score_thr=0.05,
105
+ nms=dict(type='nms', iou_threshold=0.5),
106
+ max_per_img=100,
107
+ mask_thr_binary=0.5)))
OpenPSG/configs/_base_/models/panoptic_fpn_r101_fpn_psg.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ expt_name = 'panoptic_fpn_r101_fpn_psg'
8
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/_base_/models/panoptic_fpn_r50_fpn_psg.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../models/mask_rcnn_r50_fpn.py',
3
+ '../datasets/psg_panoptic.py',
4
+ '../schedules/schedule_1x.py',
5
+ '../custom_runtime.py',
6
+ ]
7
+
8
+ model = dict(
9
+ type='PanopticFPN',
10
+ semantic_head=dict(
11
+ type='PanopticFPNHead',
12
+ num_things_classes=80,
13
+ num_stuff_classes=53,
14
+ in_channels=256,
15
+ inner_channels=128,
16
+ start_level=0,
17
+ end_level=4,
18
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
19
+ conv_cfg=None,
20
+ loss_seg=dict(type='CrossEntropyLoss',
21
+ ignore_index=255,
22
+ loss_weight=0.5),
23
+ ),
24
+ panoptic_fusion_head=dict(type='HeuristicFusionHead',
25
+ num_things_classes=80,
26
+ num_stuff_classes=53),
27
+ test_cfg=dict(panoptic=dict(
28
+ score_thr=0.6,
29
+ max_per_img=100,
30
+ mask_thr_binary=0.5,
31
+ mask_overlap=0.5,
32
+ nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True),
33
+ stuff_area_limit=4096,
34
+ )),
35
+ )
36
+
37
+ custom_hooks = []
38
+
39
+ # Change batch size and learning rate
40
+ data = dict(samples_per_gpu=8,
41
+ # workers_per_gpu=2
42
+ )
43
+ # optimizer = dict(lr=0.02)
44
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
45
+ optimizer_config = dict(_delete_=True,
46
+ grad_clip=dict(max_norm=35, norm_type=2))
47
+
48
+ lr_config = dict(policy='step',
49
+ warmup='linear',
50
+ warmup_iters=500,
51
+ warmup_ratio=1.0 / 3,
52
+ step=[8, 11])
53
+
54
+ project_name = 'openpsg'
55
+ expt_name = 'panoptic_fpn_r50_fpn_psg'
56
+ work_dir = f'./work_dirs/{expt_name}'
57
+
58
+ log_config = dict(
59
+ interval=50,
60
+ hooks=[
61
+ dict(type='TextLoggerHook'),
62
+ # dict(type='TensorboardLoggerHook')
63
+ dict(
64
+ type='WandbLoggerHook',
65
+ init_kwargs=dict(
66
+ project=project_name,
67
+ name=expt_name,
68
+ # config=work_dir + "/cfg.yaml"
69
+ ),
70
+ ),
71
+ ],
72
+ )
73
+
74
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth'
OpenPSG/configs/_base_/models/psgtr_r101.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _base_ = './psgtr_r50.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
OpenPSG/configs/_base_/models/psgtr_r50.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='PSGTr',
3
+ backbone=dict(type='ResNet',
4
+ depth=50,
5
+ num_stages=4,
6
+ out_indices=(0, 1, 2, 3),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(type='PSGTrHead',
14
+ num_classes=80,
15
+ num_relations=117,
16
+ in_channels=2048,
17
+ transformer=dict(
18
+ type='Transformer',
19
+ encoder=dict(type='DetrTransformerEncoder',
20
+ num_layers=6,
21
+ transformerlayers=dict(
22
+ type='BaseTransformerLayer',
23
+ attn_cfgs=[
24
+ dict(type='MultiheadAttention',
25
+ embed_dims=256,
26
+ num_heads=8,
27
+ dropout=0.1)
28
+ ],
29
+ feedforward_channels=2048,
30
+ ffn_dropout=0.1,
31
+ operation_order=('self_attn', 'norm',
32
+ 'ffn', 'norm'))),
33
+ decoder=dict(
34
+ type='DetrTransformerDecoder',
35
+ return_intermediate=True,
36
+ num_layers=6,
37
+ transformerlayers=dict(
38
+ type='DetrTransformerDecoderLayer',
39
+ attn_cfgs=dict(type='MultiheadAttention',
40
+ embed_dims=256,
41
+ num_heads=8,
42
+ dropout=0.1),
43
+ feedforward_channels=2048,
44
+ ffn_dropout=0.1,
45
+ operation_order=('self_attn', 'norm',
46
+ 'cross_attn', 'norm', 'ffn',
47
+ 'norm')),
48
+ )),
49
+ positional_encoding=dict(type='SinePositionalEncoding',
50
+ num_feats=128,
51
+ normalize=True),
52
+ sub_loss_cls=dict(type='CrossEntropyLoss',
53
+ use_sigmoid=False,
54
+ loss_weight=1.0,
55
+ class_weight=1.0),
56
+ sub_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
57
+ sub_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
58
+ sub_focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
59
+ sub_dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0),
60
+ obj_loss_cls=dict(type='CrossEntropyLoss',
61
+ use_sigmoid=False,
62
+ loss_weight=1.0,
63
+ class_weight=1.0),
64
+ obj_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
65
+ obj_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
66
+ obj_focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
67
+ obj_dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0),
68
+ rel_loss_cls=dict(type='CrossEntropyLoss',
69
+ use_sigmoid=False,
70
+ loss_weight=2.0,
71
+ class_weight=1.0)),
72
+ # training and testing settings
73
+ train_cfg=dict(assigner=dict(
74
+ type='HTriMatcher',
75
+ s_cls_cost=dict(type='ClassificationCost', weight=1.),
76
+ s_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
77
+ s_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
78
+ o_cls_cost=dict(type='ClassificationCost', weight=1.),
79
+ o_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
80
+ o_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
81
+ r_cls_cost=dict(type='ClassificationCost', weight=2.))),
82
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/_base_/schedules/schedule_1x.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # optimizer
2
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3
+ optimizer_config = dict(grad_clip=None)
4
+ # learning policy
5
+ lr_config = dict(policy='step',
6
+ warmup='linear',
7
+ warmup_iters=500,
8
+ warmup_ratio=0.001,
9
+ step=[8, 11])
10
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
OpenPSG/configs/_base_/schedules/schedule_3x.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # optimizer
2
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3
+ optimizer_config = dict(grad_clip=None)
4
+ # learning policy
5
+ lr_config = dict(policy='step',
6
+ warmup='linear',
7
+ warmup_iters=1000,
8
+ warmup_ratio=0.001,
9
+ step=[27, 33])
10
+ runner = dict(type='EpochBasedRunner', max_epochs=36)
OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'gpsnet_panoptic_fpn_r101_fpn_1x_predcls_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ dict(
17
+ type='WandbLoggerHook',
18
+ init_kwargs=dict(
19
+ project=project_name,
20
+ name=expt_name,
21
+ ),
22
+ ),
23
+ ],
24
+ )
25
+
26
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/gpsnet/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'gpsnet_panoptic_fpn_r101_fpn_1x_sgdet_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ dict(
17
+ type='WandbLoggerHook',
18
+ init_kwargs=dict(
19
+ project=project_name,
20
+ name=expt_name,
21
+ ),
22
+ ),
23
+ ],
24
+ )
25
+
26
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(relation_head=dict(
6
+ type='GPSHead',
7
+ head_config=dict(
8
+ # NOTE: Evaluation type
9
+ use_gt_box=True,
10
+ use_gt_label=True,
11
+ ),
12
+ ))
13
+
14
+ evaluation = dict(interval=1,
15
+ metric='predcls',
16
+ relation_mode=True,
17
+ classwise=True,
18
+ detection_method='pan_seg')
19
+
20
+ # Change batch size and learning rate
21
+ data = dict(samples_per_gpu=16, workers_per_gpu=0)
22
+ optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)
23
+
24
+ # Log config
25
+ project_name = 'openpsg'
26
+ expt_name = 'gpsnet_panoptic_fpn_r50_fpn_1x_predcls_psg'
27
+ work_dir = f'./work_dirs/{expt_name}'
28
+
29
+ log_config = dict(
30
+ interval=50,
31
+ hooks=[
32
+ dict(type='TextLoggerHook'),
33
+ dict(
34
+ type='WandbLoggerHook',
35
+ init_kwargs=dict(
36
+ project=project_name,
37
+ name=expt_name,
38
+ ),
39
+ ),
40
+ ],
41
+ )
OpenPSG/configs/gpsnet/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(
6
+ relation_head=dict(
7
+ type='GPSHead',
8
+ head_config=dict(
9
+ # NOTE: Evaluation type
10
+ use_gt_box=False,
11
+ use_gt_label=False,
12
+ ),
13
+ ),
14
+ roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
15
+ )
16
+
17
+ evaluation = dict(
18
+ interval=1,
19
+ metric='sgdet',
20
+ relation_mode=True,
21
+ classwise=True,
22
+ iou_thrs=0.5,
23
+ detection_method='pan_seg',
24
+ )
25
+
26
+ data = dict(samples_per_gpu=16)
27
+
28
+ # Log config
29
+ project_name = 'openpsg'
30
+ expt_name = 'gpsnet_panoptic_fpn_r50_fpn_1x_sgdet_psg'
31
+ work_dir = f'./work_dirs/{expt_name}'
32
+
33
+ log_config = dict(
34
+ interval=50,
35
+ hooks=[
36
+ dict(type='TextLoggerHook'),
37
+ dict(
38
+ type='WandbLoggerHook',
39
+ init_kwargs=dict(
40
+ project=project_name,
41
+ name=expt_name,
42
+ ),
43
+ ),
44
+ ],
45
+ )
OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'imp_panoptic_fpn_r101_fpn_1x_predcls_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ # dict(type='TensorboardLoggerHook')
17
+ dict(
18
+ type='WandbLoggerHook',
19
+ init_kwargs=dict(
20
+ project=project_name,
21
+ name=expt_name,
22
+ # config=work_dir + "/cfg.yaml"
23
+ ),
24
+ ),
25
+ ],
26
+ )
27
+
28
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/imp/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'imp_panoptic_fpn_r101_fpn_1x_sgdet_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ dict(
17
+ type='WandbLoggerHook',
18
+ init_kwargs=dict(
19
+ project=project_name,
20
+ name=expt_name,
21
+ ),
22
+ ),
23
+ ],
24
+ )
25
+
26
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(relation_head=dict(
6
+ type='IMPHead',
7
+ head_config=dict(
8
+ # NOTE: Evaluation type
9
+ use_gt_box=True,
10
+ use_gt_label=True,
11
+ num_iter=2,
12
+ ),
13
+ ))
14
+
15
+ evaluation = dict(interval=1,
16
+ metric='predcls',
17
+ relation_mode=True,
18
+ classwise=True)
19
+
20
+ # Change batch size and learning rate
21
+ data = dict(samples_per_gpu=16, )
22
+ # workers_per_gpu=0) # FIXME: Is this the problem?
23
+ optimizer = dict(type='SGD', lr=0.001, momentum=0.9)
24
+
25
+ # Log config
26
+ project_name = 'openpsg'
27
+ expt_name = 'imp_panoptic_fpn_r50_fpn_1x_predcls_psg'
28
+ work_dir = f'./work_dirs/{expt_name}'
29
+
30
+ log_config = dict(
31
+ interval=50,
32
+ hooks=[
33
+ dict(type='TextLoggerHook'),
34
+ # dict(type='TensorboardLoggerHook')
35
+ dict(
36
+ type='WandbLoggerHook',
37
+ init_kwargs=dict(
38
+ project=project_name,
39
+ name=expt_name,
40
+ # config=work_dir + "/cfg.yaml"
41
+ ),
42
+ ),
43
+ ],
44
+ )
OpenPSG/configs/imp/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(relation_head=dict(
6
+ type='IMPHead',
7
+ head_config=dict(
8
+ # NOTE: Evaluation type
9
+ use_gt_box=False,
10
+ use_gt_label=False,
11
+ num_iter=2,
12
+ ),
13
+ ))
14
+
15
+ evaluation = dict(
16
+ interval=1,
17
+ metric='sgdet',
18
+ relation_mode=True,
19
+ classwise=True,
20
+ iou_thrs=0.5,
21
+ detection_method='pan_seg',
22
+ )
23
+
24
+ # Change batch size and learning rate
25
+ data = dict(samples_per_gpu=16, )
26
+ # workers_per_gpu=0) # FIXME: Is this the problem?
27
+ optimizer = dict(type='SGD', lr=0.001, momentum=0.9)
28
+
29
+ # Log config
30
+ project_name = 'openpsg'
31
+ expt_name = 'imp_panoptic_fpn_r50_fpn_1x_sgdet_psg'
32
+ work_dir = f'./work_dirs/{expt_name}'
33
+
34
+ log_config = dict(
35
+ interval=50,
36
+ hooks=[
37
+ dict(type='TextLoggerHook'),
38
+ # dict(type='TensorboardLoggerHook')
39
+ dict(
40
+ type='WandbLoggerHook',
41
+ init_kwargs=dict(
42
+ project=project_name,
43
+ name=expt_name,
44
+ # config=work_dir + "/cfg.yaml"
45
+ ),
46
+ ),
47
+ ],
48
+ )
OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'motifs_panoptic_fpn_r101_fpn_1x_predcls_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ # dict(type='TensorboardLoggerHook')
17
+ dict(
18
+ type='WandbLoggerHook',
19
+ init_kwargs=dict(
20
+ project=project_name,
21
+ name=expt_name,
22
+ # config=work_dir + "/cfg.yaml"
23
+ ),
24
+ ),
25
+ ],
26
+ )
27
+
28
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/motifs/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'motifs_panoptic_fpn_r101_fpn_1x_sgdet_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ # dict(type='TensorboardLoggerHook')
17
+ dict(
18
+ type='WandbLoggerHook',
19
+ init_kwargs=dict(
20
+ project=project_name,
21
+ name=expt_name,
22
+ # config=work_dir + "/cfg.yaml"
23
+ ),
24
+ ),
25
+ ],
26
+ )
27
+
28
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/mask_rcnn_r50_fpn.py',
3
+ '../_base_/datasets/psg.py',
4
+ '../_base_/schedules/schedule_1x.py',
5
+ '../_base_/custom_runtime.py',
6
+ ]
7
+
8
+ find_unused_parameters = True
9
+ dataset_type = 'PanopticSceneGraphDataset'
10
+
11
+ # HACK:
12
+ object_classes = [
13
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
14
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
15
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
16
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
17
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
18
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
19
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
20
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
21
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
22
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
23
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
24
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
25
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
26
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
27
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
28
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
29
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
30
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
31
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
32
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
33
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
34
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
35
+ 'wall-other-merged', 'rug-merged'
36
+ ]
37
+
38
+ predicate_classes = [
39
+ 'over',
40
+ 'in front of',
41
+ 'beside',
42
+ 'on',
43
+ 'in',
44
+ 'attached to',
45
+ 'hanging from',
46
+ 'on back of',
47
+ 'falling off',
48
+ 'going down',
49
+ 'painted on',
50
+ 'walking on',
51
+ 'running on',
52
+ 'crossing',
53
+ 'standing on',
54
+ 'lying on',
55
+ 'sitting on',
56
+ 'flying over',
57
+ 'jumping over',
58
+ 'jumping from',
59
+ 'wearing',
60
+ 'holding',
61
+ 'carrying',
62
+ 'looking at',
63
+ 'guiding',
64
+ 'kissing',
65
+ 'eating',
66
+ 'drinking',
67
+ 'feeding',
68
+ 'biting',
69
+ 'catching',
70
+ 'picking',
71
+ 'playing with',
72
+ 'chasing',
73
+ 'climbing',
74
+ 'cleaning',
75
+ 'playing',
76
+ 'touching',
77
+ 'pushing',
78
+ 'pulling',
79
+ 'opening',
80
+ 'cooking',
81
+ 'talking to',
82
+ 'throwing',
83
+ 'slicing',
84
+ 'driving',
85
+ 'riding',
86
+ 'parked on',
87
+ 'driving on',
88
+ 'about to hit',
89
+ 'kicking',
90
+ 'swinging',
91
+ 'entering',
92
+ 'exiting',
93
+ 'enclosing',
94
+ 'leaning on',
95
+ ]
96
+
97
+ model = dict(
98
+ type='SceneGraphPanopticFPN',
99
+ semantic_head=dict(
100
+ type='PanopticFPNHead',
101
+ num_things_classes=80,
102
+ num_stuff_classes=53,
103
+ in_channels=256,
104
+ inner_channels=128,
105
+ start_level=0,
106
+ end_level=4,
107
+ norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),
108
+ conv_cfg=None,
109
+ loss_seg=dict(type='CrossEntropyLoss',
110
+ ignore_index=255,
111
+ loss_weight=0.5),
112
+ ),
113
+ panoptic_fusion_head=dict(type='HeuristicFusionHead',
114
+ num_things_classes=80,
115
+ num_stuff_classes=53),
116
+ test_cfg=dict(panoptic=dict(
117
+ score_thr=0.6,
118
+ max_per_img=100,
119
+ mask_thr_binary=0.5,
120
+ mask_overlap=0.5,
121
+ nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True),
122
+ stuff_area_limit=4096,
123
+ )),
124
+ relation_head=dict(
125
+ type='MotifHead',
126
+ object_classes=object_classes,
127
+ predicate_classes=predicate_classes,
128
+ num_classes=len(object_classes) + 1, # with background class
129
+ num_predicates=len(predicate_classes) + 1,
130
+ use_bias=False, # NOTE: whether to use frequency bias
131
+ head_config=dict(
132
+ # NOTE: Evaluation type
133
+ use_gt_box=True,
134
+ use_gt_label=True,
135
+ use_vision=True,
136
+ embed_dim=200,
137
+ hidden_dim=512,
138
+ roi_dim=1024,
139
+ context_pooling_dim=4096,
140
+ dropout_rate=0.2,
141
+ context_object_layer=1,
142
+ context_edge_layer=1,
143
+ glove_dir='data/glove/',
144
+ causal_effect_analysis=False,
145
+ ),
146
+ bbox_roi_extractor=dict(
147
+ type='VisualSpatialExtractor',
148
+ bbox_roi_layer=dict(type='RoIAlign',
149
+ output_size=7,
150
+ sampling_ratio=2),
151
+ with_visual_bbox=True,
152
+ with_visual_mask=False,
153
+ with_visual_point=False,
154
+ with_spatial=False,
155
+ in_channels=256,
156
+ fc_out_channels=1024,
157
+ featmap_strides=[4, 8, 16, 32],
158
+ ),
159
+ relation_roi_extractor=dict(
160
+ type='VisualSpatialExtractor',
161
+ bbox_roi_layer=dict(type='RoIAlign',
162
+ output_size=7,
163
+ sampling_ratio=2),
164
+ with_visual_bbox=True,
165
+ with_visual_mask=False,
166
+ with_visual_point=False,
167
+ with_spatial=True,
168
+ separate_spatial=False,
169
+ in_channels=256,
170
+ fc_out_channels=1024,
171
+ featmap_strides=[4, 8, 16, 32],
172
+ ),
173
+ relation_sampler=dict(
174
+ type='Motif',
175
+ pos_iou_thr=0.5,
176
+ require_overlap=False, # for sgdet training, not require
177
+ num_sample_per_gt_rel=4,
178
+ num_rel_per_image=1024,
179
+ pos_fraction=0.25,
180
+ # NOTE: To only include overlapping bboxes?
181
+ test_overlap=False, # for testing
182
+ ),
183
+ loss_object=dict(type='CrossEntropyLoss',
184
+ use_sigmoid=False,
185
+ loss_weight=1.0),
186
+ loss_relation=dict(type='CrossEntropyLoss',
187
+ use_sigmoid=False,
188
+ loss_weight=1.0),
189
+ ),
190
+ )
191
+
192
+ custom_hooks = []
193
+
194
+ # To freeze modules
195
+ freeze_modules = [
196
+ 'backbone',
197
+ 'neck',
198
+ 'rpn_head',
199
+ 'roi_head',
200
+ 'semantic_head',
201
+ 'panoptic_fusion_head',
202
+ ]
203
+
204
+ evaluation = dict(interval=1,
205
+ metric='predcls',
206
+ relation_mode=True,
207
+ classwise=True)
208
+
209
+ # Change batch size and learning rate
210
+ data = dict(samples_per_gpu=16, )
211
+ # optimizer = dict(lr=0.003)
212
+ optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001)
213
+ optimizer_config = dict(_delete_=True,
214
+ grad_clip=dict(max_norm=35, norm_type=2))
215
+
216
+ lr_config = dict(policy='step',
217
+ warmup='linear',
218
+ warmup_iters=500,
219
+ warmup_ratio=1.0 / 3,
220
+ step=[7, 10])
221
+
222
+ # Log config
223
+ project_name = 'openpsg'
224
+ expt_name = 'motifs_panoptic_fpn_r50_fpn_1x_predcls_psg'
225
+ work_dir = f'./work_dirs/{expt_name}'
226
+
227
+ log_config = dict(
228
+ interval=50,
229
+ hooks=[
230
+ dict(type='TextLoggerHook'),
231
+ dict(
232
+ type='WandbLoggerHook',
233
+ init_kwargs=dict(
234
+ project=project_name,
235
+ name=expt_name,
236
+ ),
237
+ ),
238
+ ],
239
+ )
240
+
241
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth'
OpenPSG/configs/motifs/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ './panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(
6
+ relation_head=dict(
7
+ head_config=dict(
8
+ # NOTE: Evaluation type
9
+ use_gt_box=False,
10
+ use_gt_label=False,
11
+ ), ),
12
+ roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
13
+ )
14
+
15
+ evaluation = dict(interval=1,
16
+ metric='sgdet',
17
+ relation_mode=True,
18
+ classwise=True,
19
+ iou_thrs=0.5,
20
+ detection_method='pan_seg')
21
+
22
+ # Change batch size and learning rate
23
+ data = dict(samples_per_gpu=8,
24
+ # workers_per_gpu=2
25
+ )
26
+
27
+ # Log config
28
+ project_name = 'openpsg'
29
+ expt_name = 'motifs_panoptic_fpn_r50_fpn_1x_sgdet_psg'
30
+ work_dir = f'./work_dirs/{expt_name}'
31
+
32
+ log_config = dict(
33
+ interval=50,
34
+ hooks=[
35
+ dict(type='TextLoggerHook'),
36
+ dict(
37
+ type='WandbLoggerHook',
38
+ init_kwargs=dict(
39
+ project=project_name,
40
+ name=expt_name,
41
+ ),
42
+ ),
43
+ ],
44
+ )
OpenPSG/configs/psgformer/psgformer_r101_psg.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './psgformer_r50_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # learning policy
8
+ lr_config = dict(policy='step', step=48)
9
+ runner = dict(type='EpochBasedRunner', max_epochs=60)
10
+
11
+ project_name = 'psgformer'
12
+ expt_name = 'psgformer_r101_psg'
13
+ work_dir = f'./work_dirs/{expt_name}'
14
+ checkpoint_config = dict(interval=12, max_keep_ckpts=10)
15
+
16
+ load_from = './work_dirs/checkpoints/detr4psgformer_r101.pth'
OpenPSG/configs/psgformer/psgformer_r50.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='PSGTr',
3
+ backbone=dict(type='ResNet',
4
+ depth=50,
5
+ num_stages=4,
6
+ out_indices=(0, 1, 2, 3),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(
14
+ type='PSGFormerHead',
15
+ num_classes=80,
16
+ num_relations=117,
17
+ in_channels=2048,
18
+ transformer=dict(
19
+ type='DualTransformer',
20
+ encoder=dict(type='DetrTransformerEncoder',
21
+ num_layers=6,
22
+ transformerlayers=dict(
23
+ type='BaseTransformerLayer',
24
+ attn_cfgs=[
25
+ dict(type='MultiheadAttention',
26
+ embed_dims=256,
27
+ num_heads=8,
28
+ dropout=0.1)
29
+ ],
30
+ feedforward_channels=2048,
31
+ ffn_dropout=0.1,
32
+ operation_order=('self_attn', 'norm', 'ffn',
33
+ 'norm'))),
34
+ decoder1=dict(type='DetrTransformerDecoder',
35
+ return_intermediate=True,
36
+ num_layers=6,
37
+ transformerlayers=dict(
38
+ type='DetrTransformerDecoderLayer',
39
+ attn_cfgs=dict(type='MultiheadAttention',
40
+ embed_dims=256,
41
+ num_heads=8,
42
+ dropout=0.1),
43
+ feedforward_channels=2048,
44
+ ffn_dropout=0.1,
45
+ operation_order=('self_attn', 'norm',
46
+ 'cross_attn', 'norm', 'ffn',
47
+ 'norm'))),
48
+ decoder2=dict(type='DetrTransformerDecoder',
49
+ return_intermediate=True,
50
+ num_layers=6,
51
+ transformerlayers=dict(
52
+ type='DetrTransformerDecoderLayer',
53
+ attn_cfgs=dict(type='MultiheadAttention',
54
+ embed_dims=256,
55
+ num_heads=8,
56
+ dropout=0.1),
57
+ feedforward_channels=2048,
58
+ ffn_dropout=0.1,
59
+ operation_order=('self_attn', 'norm',
60
+ 'cross_attn', 'norm', 'ffn',
61
+ 'norm'))),
62
+ ),
63
+ positional_encoding=dict(type='SinePositionalEncoding',
64
+ num_feats=128,
65
+ normalize=True),
66
+ rel_loss_cls=dict(type='CrossEntropyLoss',
67
+ use_sigmoid=False,
68
+ loss_weight=2.0,
69
+ class_weight=1.0),
70
+ sub_id_loss=dict(type='MultilabelCrossEntropy', loss_weight=2.0),
71
+ obj_id_loss=dict(type='MultilabelCrossEntropy', loss_weight=2.0),
72
+ loss_cls=dict(type='CrossEntropyLoss',
73
+ use_sigmoid=False,
74
+ loss_weight=4.0,
75
+ class_weight=1.0),
76
+ loss_bbox=dict(type='L1Loss', loss_weight=3.0),
77
+ loss_iou=dict(type='GIoULoss', loss_weight=2.0),
78
+ focal_loss=dict(type='BCEFocalLoss', loss_weight=1.0),
79
+ dice_loss=dict(type='psgtrDiceLoss', loss_weight=1.0)),
80
+ # training and testing settings
81
+ train_cfg=dict(id_assigner=dict(type='IdMatcher',
82
+ sub_id_cost=dict(type='ClassificationCost',
83
+ weight=1.),
84
+ obj_id_cost=dict(type='ClassificationCost',
85
+ weight=1.),
86
+ r_cls_cost=dict(type='ClassificationCost',
87
+ weight=1.)),
88
+ bbox_assigner=dict(type='HungarianAssigner',
89
+ cls_cost=dict(type='ClassificationCost',
90
+ weight=4.0),
91
+ reg_cost=dict(type='BBoxL1Cost',
92
+ weight=3.0),
93
+ iou_cost=dict(type='IoUCost',
94
+ iou_mode='giou',
95
+ weight=2.0))),
96
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/psgformer/psgformer_r50_psg.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ './psgformer_r50.py', '../_base_/datasets/psg.py',
3
+ '../_base_/custom_runtime.py'
4
+ ]
5
+
6
+ find_unused_parameters = True
7
+
8
+ custom_imports = dict(imports=[
9
+ 'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
10
+ 'openpsg.models.frameworks.dual_transformer',
11
+ 'openpsg.models.relation_heads.psgformer_head', 'openpsg.datasets',
12
+ 'openpsg.datasets.pipelines.loading',
13
+ 'openpsg.datasets.pipelines.rel_randomcrop',
14
+ 'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
15
+ ],
16
+ allow_failed_imports=False)
17
+
18
+ dataset_type = 'PanopticSceneGraphDataset'
19
+
20
+ # HACK:
21
+ object_classes = [
22
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
23
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
24
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
25
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
26
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
27
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
28
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
29
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
30
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
31
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
32
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
33
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
34
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
35
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
36
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
37
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
38
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
39
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
40
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
41
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
42
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
43
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
44
+ 'wall-other-merged', 'rug-merged'
45
+ ]
46
+
47
+ predicate_classes = [
48
+ 'over',
49
+ 'in front of',
50
+ 'beside',
51
+ 'on',
52
+ 'in',
53
+ 'attached to',
54
+ 'hanging from',
55
+ 'on back of',
56
+ 'falling off',
57
+ 'going down',
58
+ 'painted on',
59
+ 'walking on',
60
+ 'running on',
61
+ 'crossing',
62
+ 'standing on',
63
+ 'lying on',
64
+ 'sitting on',
65
+ 'flying over',
66
+ 'jumping over',
67
+ 'jumping from',
68
+ 'wearing',
69
+ 'holding',
70
+ 'carrying',
71
+ 'looking at',
72
+ 'guiding',
73
+ 'kissing',
74
+ 'eating',
75
+ 'drinking',
76
+ 'feeding',
77
+ 'biting',
78
+ 'catching',
79
+ 'picking',
80
+ 'playing with',
81
+ 'chasing',
82
+ 'climbing',
83
+ 'cleaning',
84
+ 'playing',
85
+ 'touching',
86
+ 'pushing',
87
+ 'pulling',
88
+ 'opening',
89
+ 'cooking',
90
+ 'talking to',
91
+ 'throwing',
92
+ 'slicing',
93
+ 'driving',
94
+ 'riding',
95
+ 'parked on',
96
+ 'driving on',
97
+ 'about to hit',
98
+ 'kicking',
99
+ 'swinging',
100
+ 'entering',
101
+ 'exiting',
102
+ 'enclosing',
103
+ 'leaning on',
104
+ ]
105
+
106
+ model = dict(bbox_head=dict(
107
+ num_classes=len(object_classes),
108
+ num_relations=len(predicate_classes),
109
+ object_classes=object_classes,
110
+ predicate_classes=predicate_classes,
111
+ num_obj_query=100,
112
+ num_rel_query=100,
113
+ ), )
114
+
115
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
116
+ std=[58.395, 57.12, 57.375],
117
+ to_rgb=True)
118
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
119
+ # from the default setting in mmdet.
120
+ train_pipeline = [
121
+ dict(type='LoadImageFromFile'),
122
+ dict(type='LoadPanopticSceneGraphAnnotations',
123
+ with_bbox=True,
124
+ with_rel=True,
125
+ with_mask=True,
126
+ with_seg=True),
127
+ dict(type='RandomFlip', flip_ratio=0.5),
128
+ dict(
129
+ type='AutoAugment',
130
+ policies=[
131
+ [
132
+ dict(type='Resize',
133
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
134
+ (576, 1333), (608, 1333), (640, 1333),
135
+ (672, 1333), (704, 1333), (736, 1333),
136
+ (768, 1333), (800, 1333)],
137
+ multiscale_mode='value',
138
+ keep_ratio=True)
139
+ ],
140
+ [
141
+ dict(type='Resize',
142
+ img_scale=[(400, 1333), (500, 1333), (600, 1333)],
143
+ multiscale_mode='value',
144
+ keep_ratio=True),
145
+ dict(type='RelRandomCrop',
146
+ crop_type='absolute_range',
147
+ crop_size=(384, 600),
148
+ allow_negative_crop=False), # no empty relations
149
+ dict(type='Resize',
150
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
151
+ (576, 1333), (608, 1333), (640, 1333),
152
+ (672, 1333), (704, 1333), (736, 1333),
153
+ (768, 1333), (800, 1333)],
154
+ multiscale_mode='value',
155
+ override=True,
156
+ keep_ratio=True)
157
+ ]
158
+ ]),
159
+ dict(type='Normalize', **img_norm_cfg),
160
+ dict(type='Pad', size_divisor=1),
161
+ dict(type='RelsFormatBundle'),
162
+ dict(type='Collect',
163
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
164
+ ]
165
+ # test_pipeline, NOTE the Pad's size_divisor is different from the default
166
+ # setting (size_divisor=32). While there is little effect on the performance
167
+ # whether we use the default setting or use size_divisor=1.
168
+ test_pipeline = [
169
+ dict(type='LoadImageFromFile'),
170
+ dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
171
+ dict(type='MultiScaleFlipAug',
172
+ img_scale=(1333, 800),
173
+ flip=False,
174
+ transforms=[
175
+ dict(type='Resize', keep_ratio=True),
176
+ dict(type='RandomFlip'),
177
+ dict(type='Normalize', **img_norm_cfg),
178
+ dict(type='Pad', size_divisor=1),
179
+ dict(type='ImageToTensor', keys=['img']),
180
+ dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
181
+ dict(type='ToDataContainer',
182
+ fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
183
+ dict(type='Collect', keys=['img']),
184
+ ])
185
+ ]
186
+
187
+ evaluation = dict(
188
+ interval=1,
189
+ metric='sgdet',
190
+ relation_mode=True,
191
+ classwise=True,
192
+ iou_thrs=0.5,
193
+ detection_method='pan_seg',
194
+ )
195
+
196
+ data = dict(samples_per_gpu=1,
197
+ workers_per_gpu=2,
198
+ train=dict(pipeline=train_pipeline),
199
+ val=dict(pipeline=test_pipeline),
200
+ test=dict(pipeline=test_pipeline))
201
+ # optimizer
202
+ optimizer = dict(
203
+ type='AdamW',
204
+ lr=0.001,
205
+ weight_decay=0.0001,
206
+ paramwise_cfg=dict(
207
+ custom_keys={
208
+ 'backbone': dict(lr_mult=0.1, decay_mult=1.0),
209
+ 'transformer.encoder': dict(lr_mult=0.1, decay_mult=1.0),
210
+ 'transformer.decoder1': dict(lr_mult=0.1, decay_mult=1.0),
211
+ 'obj_query_embed': dict(lr_mult=0.1, decay_mult=1.0),
212
+ 'input_proj': dict(lr_mult=0.1, decay_mult=1.0),
213
+ 'class_embed': dict(lr_mult=0.1, decay_mult=1.0),
214
+ 'box_embed': dict(lr_mult=0.1, decay_mult=1.0),
215
+ 'bbox_attention': dict(lr_mult=0.1, decay_mult=1.0),
216
+ 'mask_head': dict(lr_mult=0.1, decay_mult=1.0),
217
+ }))
218
+
219
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
220
+
221
+ # learning policy
222
+ lr_config = dict(policy='step', step=40)
223
+ runner = dict(type='EpochBasedRunner', max_epochs=60)
224
+
225
+ project_name = 'psgformer'
226
+ expt_name = 'psgformer_r50_psg'
227
+ work_dir = f'./work_dirs/{expt_name}'
228
+ checkpoint_config = dict(interval=1, max_keep_ckpts=15)
229
+
230
+ log_config = dict(
231
+ interval=50,
232
+ hooks=[
233
+ dict(type='TextLoggerHook'),
234
+ dict(
235
+ type='WandbLoggerHook',
236
+ init_kwargs=dict(
237
+ project=project_name,
238
+ name=expt_name,
239
+ ),
240
+ )
241
+ ],
242
+ )
243
+
244
+ load_from = './work_dirs/checkpoints/detr4psgformer_r50.pth'
OpenPSG/configs/psgformer/psgformer_r50_psg_inference.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ './psgformer_r50_psg.py'
3
+ ]
4
+
5
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ to_rgb=True)
8
+ pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(
11
+ type='MultiScaleFlipAug',
12
+ img_scale=(1333, 800),
13
+ flip=False,
14
+ transforms=[
15
+ dict(type='Resize', keep_ratio=True),
16
+ dict(type='RandomFlip'),
17
+ dict(type='Normalize', **img_norm_cfg),
18
+ dict(type='Pad', size_divisor=32),
19
+ # NOTE: Do not change the img to DC.
20
+ dict(type='ImageToTensor', keys=['img']),
21
+ dict(type='Collect', keys=['img']),
22
+
23
+ ],
24
+ ),
25
+ ]
26
+
27
+ data = dict(
28
+ test=dict(
29
+ pipeline=pipeline,
30
+ ),
31
+ )
OpenPSG/configs/psgtr/psgtr_r101_psg.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/psgtr_r101.py', '../_base_/datasets/psg.py',
3
+ '../_base_/custom_runtime.py'
4
+ ]
5
+
6
+ custom_imports = dict(imports=[
7
+ 'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
8
+ 'openpsg.models.relation_heads.psgtr_head', 'openpsg.datasets',
9
+ 'openpsg.datasets.pipelines.loading',
10
+ 'openpsg.datasets.pipelines.rel_randomcrop',
11
+ 'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
12
+ ],
13
+ allow_failed_imports=False)
14
+
15
+ dataset_type = 'PanopticSceneGraphDataset'
16
+
17
+ # HACK:
18
+ object_classes = [
19
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
20
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
21
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
22
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
23
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
24
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
25
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
26
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
27
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
28
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
29
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
30
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
31
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
32
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
33
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
34
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
35
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
36
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
37
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
38
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
39
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
40
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
41
+ 'wall-other-merged', 'rug-merged'
42
+ ]
43
+
44
+ predicate_classes = [
45
+ 'over',
46
+ 'in front of',
47
+ 'beside',
48
+ 'on',
49
+ 'in',
50
+ 'attached to',
51
+ 'hanging from',
52
+ 'on back of',
53
+ 'falling off',
54
+ 'going down',
55
+ 'painted on',
56
+ 'walking on',
57
+ 'running on',
58
+ 'crossing',
59
+ 'standing on',
60
+ 'lying on',
61
+ 'sitting on',
62
+ 'flying over',
63
+ 'jumping over',
64
+ 'jumping from',
65
+ 'wearing',
66
+ 'holding',
67
+ 'carrying',
68
+ 'looking at',
69
+ 'guiding',
70
+ 'kissing',
71
+ 'eating',
72
+ 'drinking',
73
+ 'feeding',
74
+ 'biting',
75
+ 'catching',
76
+ 'picking',
77
+ 'playing with',
78
+ 'chasing',
79
+ 'climbing',
80
+ 'cleaning',
81
+ 'playing',
82
+ 'touching',
83
+ 'pushing',
84
+ 'pulling',
85
+ 'opening',
86
+ 'cooking',
87
+ 'talking to',
88
+ 'throwing',
89
+ 'slicing',
90
+ 'driving',
91
+ 'riding',
92
+ 'parked on',
93
+ 'driving on',
94
+ 'about to hit',
95
+ 'kicking',
96
+ 'swinging',
97
+ 'entering',
98
+ 'exiting',
99
+ 'enclosing',
100
+ 'leaning on',
101
+ ]
102
+
103
+ model = dict(bbox_head=dict(
104
+ num_classes=len(object_classes),
105
+ num_relations=len(predicate_classes),
106
+ object_classes=object_classes,
107
+ predicate_classes=predicate_classes,
108
+ use_mask=True,
109
+ num_query=100,
110
+ ), )
111
+
112
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
113
+ std=[58.395, 57.12, 57.375],
114
+ to_rgb=True)
115
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
116
+ # from the default setting in mmdet.
117
+ train_pipeline = [
118
+ dict(type='LoadImageFromFile'),
119
+ dict(type='LoadPanopticSceneGraphAnnotations',
120
+ with_bbox=True,
121
+ with_rel=True,
122
+ with_mask=True,
123
+ with_seg=True),
124
+ dict(type='RandomFlip', flip_ratio=0.5),
125
+ dict(
126
+ type='AutoAugment',
127
+ policies=[
128
+ [
129
+ dict(type='Resize',
130
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
131
+ (576, 1333), (608, 1333), (640, 1333),
132
+ (672, 1333), (704, 1333), (736, 1333),
133
+ (768, 1333), (800, 1333)],
134
+ multiscale_mode='value',
135
+ keep_ratio=True)
136
+ ],
137
+ [
138
+ dict(type='Resize',
139
+ img_scale=[(400, 1333), (500, 1333), (600, 1333)],
140
+ multiscale_mode='value',
141
+ keep_ratio=True),
142
+ dict(type='RelRandomCrop',
143
+ crop_type='absolute_range',
144
+ crop_size=(384, 600),
145
+ allow_negative_crop=False), # no empty relations
146
+ dict(type='Resize',
147
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
148
+ (576, 1333), (608, 1333), (640, 1333),
149
+ (672, 1333), (704, 1333), (736, 1333),
150
+ (768, 1333), (800, 1333)],
151
+ multiscale_mode='value',
152
+ override=True,
153
+ keep_ratio=True)
154
+ ]
155
+ ]),
156
+ dict(type='Normalize', **img_norm_cfg),
157
+ dict(type='Pad', size_divisor=1),
158
+ dict(type='RelsFormatBundle'),
159
+ dict(type='Collect',
160
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
161
+ ]
162
+ # test_pipeline, NOTE the Pad's size_divisor is different from the default
163
+ # setting (size_divisor=32). While there is little effect on the performance
164
+ # whether we use the default setting or use size_divisor=1.
165
+ test_pipeline = [
166
+ dict(type='LoadImageFromFile'),
167
+ # dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
168
+ dict(
169
+ type='MultiScaleFlipAug',
170
+ img_scale=(1333, 800),
171
+ flip=False,
172
+ transforms=[
173
+ dict(type='Resize', keep_ratio=True),
174
+ dict(type='RandomFlip'),
175
+ dict(type='Normalize', **img_norm_cfg),
176
+ dict(type='Pad', size_divisor=1),
177
+ dict(type='ImageToTensor', keys=['img']),
178
+ # dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
179
+ # dict(type='ToDataContainer', fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
180
+ dict(type='Collect', keys=['img']),
181
+ ])
182
+ ]
183
+
184
+ evaluation = dict(
185
+ interval=1,
186
+ metric='sgdet',
187
+ relation_mode=True,
188
+ classwise=True,
189
+ iou_thrs=0.5,
190
+ detection_method='pan_seg',
191
+ )
192
+
193
+ data = dict(samples_per_gpu=1,
194
+ workers_per_gpu=2,
195
+ train=dict(pipeline=train_pipeline),
196
+ val=dict(pipeline=test_pipeline),
197
+ test=dict(pipeline=test_pipeline))
198
+ # optimizer
199
+ optimizer = dict(
200
+ type='AdamW',
201
+ lr=0.0001,
202
+ weight_decay=0.0001,
203
+ paramwise_cfg=dict(custom_keys={
204
+ 'backbone': dict(lr_mult=0.1, decay_mult=1.0),
205
+ }))
206
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
207
+
208
+ # learning policy
209
+ lr_config = dict(policy='step', step=40)
210
+ runner = dict(type='EpochBasedRunner', max_epochs=60)
211
+
212
+ project_name = 'psgtr'
213
+ expt_name = 'psgtr_r101_psg'
214
+ work_dir = f'./work_dirs/{expt_name}'
215
+ checkpoint_config = dict(interval=2, max_keep_ckpts=10)
216
+
217
+ log_config = dict(
218
+ interval=50,
219
+ hooks=[
220
+ dict(type='TextLoggerHook'),
221
+ dict(
222
+ type='WandbLoggerHook',
223
+ init_kwargs=dict(
224
+ project=project_name,
225
+ name=expt_name,
226
+ ),
227
+ )
228
+ ],
229
+ )
230
+
231
+ load_from = 'work_dirs/checkpoints/detr_pan_r101.pth'
OpenPSG/configs/psgtr/psgtr_r50.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='PSGTr',
3
+ backbone=dict(type='ResNet',
4
+ depth=50,
5
+ num_stages=4,
6
+ out_indices=(0, 1, 2, 3),
7
+ frozen_stages=1,
8
+ norm_cfg=dict(type='BN', requires_grad=False),
9
+ norm_eval=True,
10
+ style='pytorch',
11
+ init_cfg=dict(type='Pretrained',
12
+ checkpoint='torchvision://resnet50')),
13
+ bbox_head=dict(type='PSGTrHead',
14
+ num_classes=80,
15
+ num_relations=117,
16
+ in_channels=2048,
17
+ transformer=dict(
18
+ type='Transformer',
19
+ encoder=dict(type='DetrTransformerEncoder',
20
+ num_layers=6,
21
+ transformerlayers=dict(
22
+ type='BaseTransformerLayer',
23
+ attn_cfgs=[
24
+ dict(type='MultiheadAttention',
25
+ embed_dims=256,
26
+ num_heads=8,
27
+ dropout=0.1)
28
+ ],
29
+ feedforward_channels=2048,
30
+ ffn_dropout=0.1,
31
+ operation_order=('self_attn', 'norm',
32
+ 'ffn', 'norm'))),
33
+ decoder=dict(
34
+ type='DetrTransformerDecoder',
35
+ return_intermediate=True,
36
+ num_layers=6,
37
+ transformerlayers=dict(
38
+ type='DetrTransformerDecoderLayer',
39
+ attn_cfgs=dict(type='MultiheadAttention',
40
+ embed_dims=256,
41
+ num_heads=8,
42
+ dropout=0.1),
43
+ feedforward_channels=2048,
44
+ ffn_dropout=0.1,
45
+ operation_order=('self_attn', 'norm',
46
+ 'cross_attn', 'norm', 'ffn',
47
+ 'norm')),
48
+ )),
49
+ positional_encoding=dict(type='SinePositionalEncoding',
50
+ num_feats=128,
51
+ normalize=True),
52
+ sub_loss_cls=dict(type='CrossEntropyLoss',
53
+ use_sigmoid=False,
54
+ loss_weight=1.0,
55
+ class_weight=1.0),
56
+ sub_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
57
+ sub_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
58
+ sub_focal_loss=dict(type='BCEFocalLoss', loss_weight=2.0),
59
+ sub_dice_loss=dict(type='psgtrDiceLoss', loss_weight=2.0),
60
+ obj_loss_cls=dict(type='CrossEntropyLoss',
61
+ use_sigmoid=False,
62
+ loss_weight=1.0,
63
+ class_weight=1.0),
64
+ obj_loss_bbox=dict(type='L1Loss', loss_weight=5.0),
65
+ obj_loss_iou=dict(type='GIoULoss', loss_weight=2.0),
66
+ obj_focal_loss=dict(type='BCEFocalLoss', loss_weight=2.0),
67
+ obj_dice_loss=dict(type='psgtrDiceLoss', loss_weight=2.0),
68
+ rel_loss_cls=dict(type='CrossEntropyLoss',
69
+ use_sigmoid=False,
70
+ loss_weight=2.0,
71
+ class_weight=1.0)),
72
+ # training and testing settings
73
+ train_cfg=dict(assigner=dict(
74
+ type='HTriMatcher',
75
+ s_cls_cost=dict(type='ClassificationCost', weight=1.),
76
+ s_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
77
+ s_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
78
+ o_cls_cost=dict(type='ClassificationCost', weight=1.),
79
+ o_reg_cost=dict(type='BBoxL1Cost', weight=5.0),
80
+ o_iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
81
+ r_cls_cost=dict(type='ClassificationCost', weight=2.))),
82
+ test_cfg=dict(max_per_img=100))
OpenPSG/configs/psgtr/psgtr_r50_psg.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/psgtr_r50.py', '../_base_/datasets/psg.py',
3
+ '../_base_/custom_runtime.py'
4
+ ]
5
+
6
+ custom_imports = dict(imports=[
7
+ 'openpsg.models.frameworks.psgtr', 'openpsg.models.losses.seg_losses',
8
+ 'openpsg.models.relation_heads.psgtr_head', 'openpsg.datasets',
9
+ 'openpsg.datasets.pipelines.loading',
10
+ 'openpsg.datasets.pipelines.rel_randomcrop',
11
+ 'openpsg.models.relation_heads.approaches.matcher', 'openpsg.utils'
12
+ ],
13
+ allow_failed_imports=False)
14
+
15
+ dataset_type = 'PanopticSceneGraphDataset'
16
+
17
+ # HACK:
18
+ object_classes = [
19
+ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
20
+ 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
21
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
22
+ 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
23
+ 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
24
+ 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
25
+ 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
26
+ 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
27
+ 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
28
+ 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
29
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
30
+ 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
31
+ 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard',
32
+ 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit',
33
+ 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform',
34
+ 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea',
35
+ 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone',
36
+ 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other',
37
+ 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged',
38
+ 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged',
39
+ 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged',
40
+ 'food-other-merged', 'building-other-merged', 'rock-merged',
41
+ 'wall-other-merged', 'rug-merged'
42
+ ]
43
+
44
+ predicate_classes = [
45
+ 'over',
46
+ 'in front of',
47
+ 'beside',
48
+ 'on',
49
+ 'in',
50
+ 'attached to',
51
+ 'hanging from',
52
+ 'on back of',
53
+ 'falling off',
54
+ 'going down',
55
+ 'painted on',
56
+ 'walking on',
57
+ 'running on',
58
+ 'crossing',
59
+ 'standing on',
60
+ 'lying on',
61
+ 'sitting on',
62
+ 'flying over',
63
+ 'jumping over',
64
+ 'jumping from',
65
+ 'wearing',
66
+ 'holding',
67
+ 'carrying',
68
+ 'looking at',
69
+ 'guiding',
70
+ 'kissing',
71
+ 'eating',
72
+ 'drinking',
73
+ 'feeding',
74
+ 'biting',
75
+ 'catching',
76
+ 'picking',
77
+ 'playing with',
78
+ 'chasing',
79
+ 'climbing',
80
+ 'cleaning',
81
+ 'playing',
82
+ 'touching',
83
+ 'pushing',
84
+ 'pulling',
85
+ 'opening',
86
+ 'cooking',
87
+ 'talking to',
88
+ 'throwing',
89
+ 'slicing',
90
+ 'driving',
91
+ 'riding',
92
+ 'parked on',
93
+ 'driving on',
94
+ 'about to hit',
95
+ 'kicking',
96
+ 'swinging',
97
+ 'entering',
98
+ 'exiting',
99
+ 'enclosing',
100
+ 'leaning on',
101
+ ]
102
+
103
+ model = dict(bbox_head=dict(
104
+ num_classes=len(object_classes),
105
+ num_relations=len(predicate_classes),
106
+ object_classes=object_classes,
107
+ predicate_classes=predicate_classes,
108
+ use_mask=True,
109
+ num_query=100,
110
+ ), )
111
+
112
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
113
+ std=[58.395, 57.12, 57.375],
114
+ to_rgb=True)
115
+ # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different
116
+ # from the default setting in mmdet.
117
+ train_pipeline = [
118
+ dict(type='LoadImageFromFile'),
119
+ dict(type='LoadPanopticSceneGraphAnnotations',
120
+ with_bbox=True,
121
+ with_rel=True,
122
+ with_mask=True,
123
+ with_seg=True),
124
+ dict(type='RandomFlip', flip_ratio=0.5),
125
+ dict(
126
+ type='AutoAugment',
127
+ policies=[
128
+ [
129
+ dict(type='Resize',
130
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
131
+ (576, 1333), (608, 1333), (640, 1333),
132
+ (672, 1333), (704, 1333), (736, 1333),
133
+ (768, 1333), (800, 1333)],
134
+ multiscale_mode='value',
135
+ keep_ratio=True)
136
+ ],
137
+ [
138
+ dict(type='Resize',
139
+ img_scale=[(400, 1333), (500, 1333), (600, 1333)],
140
+ multiscale_mode='value',
141
+ keep_ratio=True),
142
+ dict(type='RelRandomCrop',
143
+ crop_type='absolute_range',
144
+ crop_size=(384, 600),
145
+ allow_negative_crop=False), # no empty relations
146
+ dict(type='Resize',
147
+ img_scale=[(480, 1333), (512, 1333), (544, 1333),
148
+ (576, 1333), (608, 1333), (640, 1333),
149
+ (672, 1333), (704, 1333), (736, 1333),
150
+ (768, 1333), (800, 1333)],
151
+ multiscale_mode='value',
152
+ override=True,
153
+ keep_ratio=True)
154
+ ]
155
+ ]),
156
+ dict(type='Normalize', **img_norm_cfg),
157
+ dict(type='Pad', size_divisor=1),
158
+ dict(type='RelsFormatBundle'),
159
+ dict(type='Collect',
160
+ keys=['img', 'gt_bboxes', 'gt_labels', 'gt_rels', 'gt_masks'])
161
+ ]
162
+ # test_pipeline, NOTE the Pad's size_divisor is different from the default
163
+ # setting (size_divisor=32). While there is little effect on the performance
164
+ # whether we use the default setting or use size_divisor=1.
165
+ test_pipeline = [
166
+ dict(type='LoadImageFromFile'),
167
+ # dict(type='LoadSceneGraphAnnotations', with_bbox=True, with_rel=True),
168
+ dict(
169
+ type='MultiScaleFlipAug',
170
+ img_scale=(1333, 800),
171
+ flip=False,
172
+ transforms=[
173
+ dict(type='Resize', keep_ratio=True),
174
+ dict(type='RandomFlip'),
175
+ dict(type='Normalize', **img_norm_cfg),
176
+ dict(type='Pad', size_divisor=1),
177
+ dict(type='ImageToTensor', keys=['img']),
178
+ # dict(type='ToTensor', keys=['gt_bboxes', 'gt_labels']),
179
+ # dict(type='ToDataContainer', fields=(dict(key='gt_bboxes'), dict(key='gt_labels'))),
180
+ dict(type='Collect', keys=['img']),
181
+ ])
182
+ ]
183
+
184
+ evaluation = dict(
185
+ interval=1,
186
+ metric='sgdet',
187
+ relation_mode=True,
188
+ classwise=True,
189
+ iou_thrs=0.5,
190
+ detection_method='pan_seg',
191
+ )
192
+
193
+ data = dict(samples_per_gpu=1,
194
+ workers_per_gpu=2,
195
+ train=dict(pipeline=train_pipeline),
196
+ val=dict(pipeline=test_pipeline),
197
+ test=dict(pipeline=test_pipeline))
198
+ # optimizer
199
+ optimizer = dict(
200
+ type='AdamW',
201
+ lr=0.0001,
202
+ weight_decay=0.0001,
203
+ paramwise_cfg=dict(custom_keys={
204
+ 'backbone': dict(lr_mult=0.1, decay_mult=1.0),
205
+ }))
206
+ optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))
207
+
208
+ # learning policy
209
+ lr_config = dict(policy='step', step=40)
210
+ runner = dict(type='EpochBasedRunner', max_epochs=60)
211
+
212
+ project_name = 'psgformer'
213
+ expt_name = 'psgtr_r50_psg_0.5_scale_mask'
214
+ work_dir = f'./work_dirs/{expt_name}'
215
+ checkpoint_config = dict(interval=2, max_keep_ckpts=10)
216
+
217
+ log_config = dict(
218
+ interval=50,
219
+ hooks=[
220
+ dict(type='TextLoggerHook'),
221
+ # dict(type='TensorboardLoggerHook'),
222
+ dict(
223
+ type='WandbLoggerHook',
224
+ init_kwargs=dict(
225
+ project=project_name,
226
+ name=expt_name,
227
+ # config=work_dir + "/cfg.yaml"
228
+ ),
229
+ )
230
+ ],
231
+ )
232
+
233
+ load_from = 'work_dirs/checkpoints/detr_pan_r50.pth'
OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ './psgtr_r50_psg.py'
3
+ ]
4
+
5
+ img_norm_cfg = dict(mean=[123.675, 116.28, 103.53],
6
+ std=[58.395, 57.12, 57.375],
7
+ to_rgb=True)
8
+ pipeline = [
9
+ dict(type='LoadImageFromFile'),
10
+ dict(
11
+ type='MultiScaleFlipAug',
12
+ img_scale=(1333, 800),
13
+ flip=False,
14
+ transforms=[
15
+ dict(type='Resize', keep_ratio=True),
16
+ dict(type='RandomFlip'),
17
+ dict(type='Normalize', **img_norm_cfg),
18
+ dict(type='Pad', size_divisor=32),
19
+ # NOTE: Do not change the img to DC.
20
+ dict(type='ImageToTensor', keys=['img']),
21
+ dict(type='Collect', keys=['img']),
22
+
23
+ ],
24
+ ),
25
+ ]
26
+
27
+ data = dict(
28
+ test=dict(
29
+ pipeline=pipeline,
30
+ ),
31
+ )
OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_predcls_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'vctree_panoptic_fpn_r101_fpn_1x_predcls_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ # dict(type='TensorboardLoggerHook')
17
+ dict(
18
+ type='WandbLoggerHook',
19
+ init_kwargs=dict(
20
+ project=project_name,
21
+ name=expt_name,
22
+ # config=work_dir + "/cfg.yaml"
23
+ ),
24
+ ),
25
+ ],
26
+ )
27
+
28
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/vctree/panoptic_fpn_r101_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = './panoptic_fpn_r50_fpn_1x_sgdet_psg.py'
2
+
3
+ model = dict(backbone=dict(
4
+ depth=101,
5
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101')))
6
+
7
+ # Log config
8
+ project_name = 'openpsg'
9
+ expt_name = 'vctree_panoptic_fpn_r101_fpn_1x_sgdet_psg'
10
+ work_dir = f'./work_dirs/{expt_name}'
11
+
12
+ log_config = dict(
13
+ interval=50,
14
+ hooks=[
15
+ dict(type='TextLoggerHook'),
16
+ # dict(type='TensorboardLoggerHook')
17
+ dict(
18
+ type='WandbLoggerHook',
19
+ init_kwargs=dict(
20
+ project=project_name,
21
+ name=expt_name,
22
+ # config=work_dir + "/cfg.yaml"
23
+ ),
24
+ ),
25
+ ],
26
+ )
27
+
28
+ load_from = 'work_dirs/checkpoints/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth'
OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_predcls_psg.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(relation_head=dict(
6
+ type='VCTreeHead',
7
+ head_config=dict(
8
+ # NOTE: Evaluation type
9
+ use_gt_box=True,
10
+ use_gt_label=True,
11
+ ),
12
+ ))
13
+
14
+ evaluation = dict(interval=1,
15
+ metric='predcls',
16
+ relation_mode=True,
17
+ classwise=True)
18
+
19
+ # Change batch size and learning rate
20
+ data = dict(samples_per_gpu=16,
21
+ workers_per_gpu=0) # FIXME: Is this the problem?
22
+ # optimizer = dict(lr=0.001)
23
+
24
+ # Log config
25
+ project_name = 'openpsg'
26
+ expt_name = 'vctree_panoptic_fpn_r50_fpn_1x_predcls_psg'
27
+ work_dir = f'./work_dirs/{expt_name}'
28
+
29
+ log_config = dict(
30
+ interval=50,
31
+ hooks=[
32
+ dict(type='TextLoggerHook'),
33
+ # dict(type='TensorboardLoggerHook')
34
+ dict(
35
+ type='WandbLoggerHook',
36
+ init_kwargs=dict(
37
+ project=project_name,
38
+ name=expt_name,
39
+ # config=work_dir + "/cfg.yaml"
40
+ ),
41
+ ),
42
+ ],
43
+ )
OpenPSG/configs/vctree/panoptic_fpn_r50_fpn_1x_sgdet_psg.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../motifs/panoptic_fpn_r50_fpn_1x_predcls_psg.py',
3
+ ]
4
+
5
+ model = dict(
6
+ relation_head=dict(
7
+ type='VCTreeHead',
8
+ head_config=dict(
9
+ # NOTE: Evaluation type
10
+ use_gt_box=False,
11
+ use_gt_label=False,
12
+ ),
13
+ ),
14
+ roi_head=dict(bbox_head=dict(type='SceneGraphBBoxHead'), ),
15
+ )
16
+
17
+ evaluation = dict(interval=1,
18
+ metric='sgdet',
19
+ relation_mode=True,
20
+ classwise=True,
21
+ iou_thrs=0.5,
22
+ detection_method='pan_seg')
23
+
24
+ # Change batch size and learning rate
25
+ data = dict(samples_per_gpu=16,
26
+ # workers_per_gpu=2
27
+ )
28
+ # optimizer = dict(lr=0.003)
29
+
30
+ # Log config
31
+ project_name = 'openpsg'
32
+ expt_name = 'vctree_panoptic_fpn_r50_fpn_1x_sgdet_psg'
33
+ work_dir = f'./work_dirs/{expt_name}'
34
+
35
+ log_config = dict(
36
+ interval=50,
37
+ hooks=[
38
+ dict(type='TextLoggerHook'),
39
+ # dict(type='TensorboardLoggerHook')
40
+ dict(
41
+ type='WandbLoggerHook',
42
+ init_kwargs=dict(
43
+ project=project_name,
44
+ name=expt_name,
45
+ # config=work_dir + "/cfg.yaml"
46
+ ),
47
+ ),
48
+ ],
49
+ )
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: OpenPSG
3
- emoji: 🐠
4
- colorFrom: green
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.1.4
8
  app_file: app.py
 
1
  ---
2
  title: OpenPSG
3
+ emoji: πŸ–ΌοΈπŸ™οΈπŸŒ„πŸŒ‰
4
+ colorFrom: yellow
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.1.4
8
  app_file: app.py
app.py CHANGED
@@ -1,15 +1,135 @@
1
- import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- def sepia(input_img):
5
- sepia_filter = np.array([
6
- [0.393, 0.769, 0.189],
7
- [0.349, 0.686, 0.168],
8
- [0.272, 0.534, 0.131]
9
- ])
10
- sepia_img = input_img.dot(sepia_filter.T)
11
- sepia_img /= sepia_img.max()
12
- return sepia_img
13
-
14
- demo = gr.Interface(sepia, gr.Image(shape=(200, 200)), "image")
15
- demo.launch(share=True)
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import os
7
+ import pathlib
8
+ import subprocess
9
+ import tarfile
10
+
11
+ if os.getenv('SYSTEM') == 'spaces':
12
+ import mim
13
+
14
+ mim.uninstall('mmcv-full', confirm_yes=True)
15
+ mim.install('mmcv-full==1.5.2', is_yes=True)
16
+
17
+ subprocess.call('pip uninstall -y opencv-python'.split())
18
+ subprocess.call('pip uninstall -y opencv-python-headless'.split())
19
+ subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
20
+
21
+ import cv2
22
  import gradio as gr
23
+ import numpy as np
24
+
25
+ from mmdet.apis import init_detector, inference_detector
26
+ from utils import show_result
27
+ import mmcv
28
+ from mmcv import Config
29
+ import os.path as osp
30
+
31
+ DESCRIPTION = '''# OpenPSG
32
+
33
+ This is an official demo for [OpenPSG](https://github.com/Jingkang50/OpenPSG).
34
+ <img id="overview" alt="overview" src="https://camo.githubusercontent.com/880346b66831a8212074787ba9a2301b4d700bd8f765ca11e4845ac0ab34c230/68747470733a2f2f6c6976652e737461746963666c69636b722e636f6d2f36353533352f35323139333837393637375f373531613465306237395f6b2e6a7067" />
35
+ '''
36
+ FOOTER = '<img id="visitor-badge" src="https://visitor-badge.glitch.me/badge?page_id=c-liangyu.openpsg" alt="visitor badge" />'
37
+
38
+
39
+ def parse_args() -> argparse.Namespace:
40
+ parser = argparse.ArgumentParser()
41
+ parser.add_argument('--device', type=str, default='cpu')
42
+ parser.add_argument('--theme', type=str)
43
+ parser.add_argument('--share', action='store_true')
44
+ parser.add_argument('--port', type=int)
45
+ parser.add_argument('--disable-queue',
46
+ dest='enable_queue',
47
+ action='store_false')
48
+ return parser.parse_args()
49
+
50
+
51
+ def update_input_image(image: np.ndarray) -> dict:
52
+ if image is None:
53
+ return gr.Image.update(value=None)
54
+ scale = 1500 / max(image.shape[:2])
55
+ if scale < 1:
56
+ image = cv2.resize(image, None, fx=scale, fy=scale)
57
+ return gr.Image.update(value=image)
58
+
59
+
60
+ def set_example_image(example: list) -> dict:
61
+ return gr.Image.update(value=example[0])
62
+
63
+
64
+ def infer(model, input_image, num_rel):
65
+ result = inference_detector(model, input_image)
66
+ return show_result(input_image,
67
+ result,
68
+ is_one_stage=True,
69
+ num_rel=num_rel,
70
+ show=True
71
+ )
72
+
73
+
74
+ def main():
75
+ args = parse_args()
76
+
77
+ model_ckt ='OpenPSG/checkpoints/epoch_60.pth'
78
+ cfg = Config.fromfile('OpenPSG/configs/psgtr/psgtr_r50_psg_inference.py')
79
+
80
+ model = init_detector(cfg, model_ckt, device=args.device)
81
+
82
+ with gr.Blocks(theme=args.theme, css='style.css') as demo:
83
+ gr.Markdown(DESCRIPTION)
84
+
85
+ with gr.Row():
86
+ with gr.Column():
87
+ with gr.Row():
88
+ input_image = gr.Image(label='Input Image', type='numpy')
89
+ with gr.Group():
90
+ with gr.Row():
91
+ num_rel = gr.Slider(
92
+ 5,
93
+ 100,
94
+ step=5,
95
+ value=20,
96
+ label='Number of Relations')
97
+ with gr.Row():
98
+ run_button = gr.Button(value='Run')
99
+ # prediction_results = gr.Variable()
100
+ with gr.Column():
101
+ with gr.Row():
102
+ # visualization = gr.Image(label='Result', type='numpy')
103
+ result = gr.Gallery(label='Result', type='numpy')
104
+
105
+ with gr.Row():
106
+ paths = sorted(pathlib.Path('images').rglob('*.jpg'))
107
+ example_images = gr.Dataset(components=[input_image],
108
+ samples=[[path.as_posix()]
109
+ for path in paths])
110
+
111
+ gr.Markdown(FOOTER)
112
+
113
+ input_image.change(fn=update_input_image,
114
+ inputs=input_image,
115
+ outputs=input_image)
116
+
117
+ run_button.click(fn=infer,
118
+ inputs=[
119
+ model, input_image
120
+ ],
121
+ outputs=result)
122
+
123
+ example_images.click(fn=set_example_image,
124
+ inputs=example_images,
125
+ outputs=input_image)
126
+
127
+ demo.launch(
128
+ enable_queue=args.enable_queue,
129
+ server_port=args.port,
130
+ share=args.share,
131
+ )
132
+
133
 
134
+ if __name__ == '__main__':
135
+ main()
 
 
 
 
 
 
 
 
 
 
fake_gan.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # another demo
2
+ # https://huggingface.co/spaces/dalle-mini/dalle-mini/blob/21944e2a8508568387951fc66a30e90f1d58819d/app/gradio/app.py
3
+
4
+ # This demo needs to be run from the repo folder.
5
+ # python demo/fake_gan/run.py
6
+ import os
7
+ import random
8
+ import time
9
+
10
+ import gradio as gr
11
+
12
+
13
+ def fake_gan(count, *args):
14
+ time.sleep(1)
15
+ images = [
16
+ random.choice(
17
+ [
18
+ "https://images.unsplash.com/photo-1507003211169-0a1dd7228f2d?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
19
+ "https://images.unsplash.com/photo-1554151228-14d9def656e4?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=386&q=80",
20
+ "https://images.unsplash.com/photo-1542909168-82c3e7fdca5c?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxzZWFyY2h8MXx8aHVtYW4lMjBmYWNlfGVufDB8fDB8fA%3D%3D&w=1000&q=80",
21
+ "https://images.unsplash.com/photo-1546456073-92b9f0a8d413?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=387&q=80",
22
+ "https://images.unsplash.com/photo-1601412436009-d964bd02edbc?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&w=464&q=80",
23
+ ]
24
+ )
25
+ for _ in range(int(count))
26
+ ]
27
+ return images
28
+
29
+
30
+ cheetah = os.path.join(os.path.dirname(__file__), "files/cheetah1.jpg")
31
+
32
+ demo = gr.Interface(
33
+ fn=fake_gan,
34
+ inputs=[
35
+ gr.Number(label="Generation Count"),
36
+ gr.Image(label="Initial Image (optional)"),
37
+ gr.Slider(0, 50, 25, label="TV_scale (for smoothness)"),
38
+ gr.Slider(0, 50, 25, label="Range_Scale (out of range RBG)"),
39
+ gr.Number(label="Seed"),
40
+ gr.Number(label="Respacing"),
41
+ ],
42
+ outputs=gr.Gallery(label="Generated Images"),
43
+ title="FD-GAN",
44
+ description="This is a fake demo of a GAN. In reality, the images are randomly chosen from Unsplash.",
45
+ examples=[
46
+ [2, cheetah, 12, None, None, None],
47
+ [1, cheetah, 2, None, None, None],
48
+ [4, cheetah, 42, None, None, None],
49
+ [5, cheetah, 23, None, None, None],
50
+ [4, cheetah, 11, None, None, None],
51
+ [3, cheetah, 1, None, None, None],
52
+ ],
53
+ )
54
+
55
+ if __name__ == "__main__":
56
+ demo.launch()
images/cooking.jpg ADDED
images/forrest-gump.jpg ADDED
images/friends.jpg ADDED
images/mbappe.jpg ADDED
images/messi.jpg ADDED