hnthai commited on
Commit
a9bd37f
·
1 Parent(s): 6ccaa09

first commit

Browse files
Pose_Video ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 6ccaa09c24051b0d9d4e8a9b9297e06dbc9102e6
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import gradio as gr
3
+ from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
4
+ vis_pose_result, process_mmdet_results)
5
+ from mmdet.apis import inference_detector, init_detector
6
+ import mediapy
7
+
8
+ pose_config = 'configs/topdown_heatmap_hrnet_w48_coco_256x192.py'
9
+ pose_checkpoint = 'hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
10
+ det_config = 'configs/faster_rcnn_r50_fpn_1x_coco.py'
11
+ det_checkpoint = 'faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
12
+
13
+ # initialize pose model
14
+ pose_model = init_pose_model(pose_config, pose_checkpoint, device='cpu')
15
+ # initialize detector
16
+ det_model = init_detector(det_config, det_checkpoint, device='cpu')
17
+
18
+
19
+ max_num_frames=120
20
+ def predict(video_path):
21
+ cap = cv2.VideoCapture(video_path)
22
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
23
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
24
+ fps = cap.get(cv2.CAP_PROP_FPS)
25
+
26
+ preds_all = []
27
+
28
+ # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
29
+ # out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
30
+ # writer = cv2.VideoWriter(out_file.name, fourcc, fps, (width, height))
31
+ frames = []
32
+
33
+ for _ in range(max_num_frames):
34
+ ok, frame = cap.read()
35
+ if not ok:
36
+ break
37
+ rgb_frame = frame[:,:,::-1]
38
+ mmdet_results = inference_detector(det_model, rgb_frame)
39
+ person_results = process_mmdet_results(mmdet_results, cat_id=1)
40
+ pose_results, returned_outputs = inference_top_down_pose_model(
41
+ pose_model,
42
+ rgb_frame,
43
+ person_results,
44
+ bbox_thr=0.3,
45
+ format='xyxy',
46
+ dataset=pose_model.cfg.data.test.type)
47
+ vis_result = vis_pose_result(
48
+ pose_model,
49
+ rgb_frame,
50
+ pose_results,
51
+ dataset=pose_model.cfg.data.test.type,
52
+ show=False)
53
+ frames.append(vis_result)
54
+ cap.release()
55
+ # writer.release()
56
+ mediapy.write_video("out.mp4", frames, fps=fps)
57
+ return "out.mp4"
58
+
59
+ title = "Pose Estimation video"
60
+ description = ""
61
+ article = ""
62
+
63
+ example_list = ['examples/000001_mpiinew_test.mp4']
64
+
65
+ # Create the Gradio demo
66
+ demo = gr.Interface(fn=predict,
67
+ inputs=gr.Video(label='Input Video'),
68
+ outputs=gr.Video(label='Result'),
69
+ examples=example_list,
70
+ title=title,
71
+ description=description,
72
+ article=article)
73
+
74
+ # Launch the demo!
75
+ demo.queue().launch(show_api=False)
configs/faster_rcnn_r50_fpn_1x_coco.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='FasterRCNN',
3
+ backbone=dict(
4
+ type='ResNet',
5
+ depth=50,
6
+ num_stages=4,
7
+ out_indices=(0, 1, 2, 3),
8
+ frozen_stages=1,
9
+ norm_cfg=dict(type='BN', requires_grad=True),
10
+ norm_eval=True,
11
+ style='pytorch',
12
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
13
+ neck=dict(
14
+ type='FPN',
15
+ in_channels=[256, 512, 1024, 2048],
16
+ out_channels=256,
17
+ num_outs=5),
18
+ rpn_head=dict(
19
+ type='RPNHead',
20
+ in_channels=256,
21
+ feat_channels=256,
22
+ anchor_generator=dict(
23
+ type='AnchorGenerator',
24
+ scales=[8],
25
+ ratios=[0.5, 1.0, 2.0],
26
+ strides=[4, 8, 16, 32, 64]),
27
+ bbox_coder=dict(
28
+ type='DeltaXYWHBBoxCoder',
29
+ target_means=[0.0, 0.0, 0.0, 0.0],
30
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
31
+ loss_cls=dict(
32
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
33
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
34
+ roi_head=dict(
35
+ type='StandardRoIHead',
36
+ bbox_roi_extractor=dict(
37
+ type='SingleRoIExtractor',
38
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
39
+ out_channels=256,
40
+ featmap_strides=[4, 8, 16, 32]),
41
+ bbox_head=dict(
42
+ type='Shared2FCBBoxHead',
43
+ in_channels=256,
44
+ fc_out_channels=1024,
45
+ roi_feat_size=7,
46
+ num_classes=80,
47
+ bbox_coder=dict(
48
+ type='DeltaXYWHBBoxCoder',
49
+ target_means=[0.0, 0.0, 0.0, 0.0],
50
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
51
+ reg_class_agnostic=False,
52
+ loss_cls=dict(
53
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
54
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
55
+ train_cfg=dict(
56
+ rpn=dict(
57
+ assigner=dict(
58
+ type='MaxIoUAssigner',
59
+ pos_iou_thr=0.7,
60
+ neg_iou_thr=0.3,
61
+ min_pos_iou=0.3,
62
+ match_low_quality=True,
63
+ ignore_iof_thr=-1),
64
+ sampler=dict(
65
+ type='RandomSampler',
66
+ num=256,
67
+ pos_fraction=0.5,
68
+ neg_pos_ub=-1,
69
+ add_gt_as_proposals=False),
70
+ allowed_border=-1,
71
+ pos_weight=-1,
72
+ debug=False),
73
+ rpn_proposal=dict(
74
+ nms_pre=2000,
75
+ max_per_img=1000,
76
+ nms=dict(type='nms', iou_threshold=0.7),
77
+ min_bbox_size=0),
78
+ rcnn=dict(
79
+ assigner=dict(
80
+ type='MaxIoUAssigner',
81
+ pos_iou_thr=0.5,
82
+ neg_iou_thr=0.5,
83
+ min_pos_iou=0.5,
84
+ match_low_quality=False,
85
+ ignore_iof_thr=-1),
86
+ sampler=dict(
87
+ type='RandomSampler',
88
+ num=512,
89
+ pos_fraction=0.25,
90
+ neg_pos_ub=-1,
91
+ add_gt_as_proposals=True),
92
+ pos_weight=-1,
93
+ debug=False)),
94
+ test_cfg=dict(
95
+ rpn=dict(
96
+ nms_pre=1000,
97
+ max_per_img=1000,
98
+ nms=dict(type='nms', iou_threshold=0.7),
99
+ min_bbox_size=0),
100
+ rcnn=dict(
101
+ score_thr=0.05,
102
+ nms=dict(type='nms', iou_threshold=0.5),
103
+ max_per_img=100)))
104
+ dataset_type = 'CocoDataset'
105
+ data_root = 'data/coco/'
106
+ img_norm_cfg = dict(
107
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
108
+ train_pipeline = [
109
+ dict(type='LoadImageFromFile'),
110
+ dict(type='LoadAnnotations', with_bbox=True),
111
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
112
+ dict(type='RandomFlip', flip_ratio=0.5),
113
+ dict(
114
+ type='Normalize',
115
+ mean=[123.675, 116.28, 103.53],
116
+ std=[58.395, 57.12, 57.375],
117
+ to_rgb=True),
118
+ dict(type='Pad', size_divisor=32),
119
+ dict(type='DefaultFormatBundle'),
120
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
121
+ ]
122
+ test_pipeline = [
123
+ dict(type='LoadImageFromFile'),
124
+ dict(
125
+ type='MultiScaleFlipAug',
126
+ img_scale=(1333, 800),
127
+ flip=False,
128
+ transforms=[
129
+ dict(type='Resize', keep_ratio=True),
130
+ dict(type='RandomFlip'),
131
+ dict(
132
+ type='Normalize',
133
+ mean=[123.675, 116.28, 103.53],
134
+ std=[58.395, 57.12, 57.375],
135
+ to_rgb=True),
136
+ dict(type='Pad', size_divisor=32),
137
+ dict(type='ImageToTensor', keys=['img']),
138
+ dict(type='Collect', keys=['img'])
139
+ ])
140
+ ]
141
+ data = dict(
142
+ samples_per_gpu=2,
143
+ workers_per_gpu=2,
144
+ train=dict(
145
+ type='CocoDataset',
146
+ ann_file='data/coco/annotations/instances_train2017.json',
147
+ img_prefix='data/coco/train2017/',
148
+ pipeline=[
149
+ dict(type='LoadImageFromFile'),
150
+ dict(type='LoadAnnotations', with_bbox=True),
151
+ dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
152
+ dict(type='RandomFlip', flip_ratio=0.5),
153
+ dict(
154
+ type='Normalize',
155
+ mean=[123.675, 116.28, 103.53],
156
+ std=[58.395, 57.12, 57.375],
157
+ to_rgb=True),
158
+ dict(type='Pad', size_divisor=32),
159
+ dict(type='DefaultFormatBundle'),
160
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
161
+ ]),
162
+ val=dict(
163
+ type='CocoDataset',
164
+ ann_file='data/coco/annotations/instances_val2017.json',
165
+ img_prefix='data/coco/val2017/',
166
+ pipeline=[
167
+ dict(type='LoadImageFromFile'),
168
+ dict(
169
+ type='MultiScaleFlipAug',
170
+ img_scale=(1333, 800),
171
+ flip=False,
172
+ transforms=[
173
+ dict(type='Resize', keep_ratio=True),
174
+ dict(type='RandomFlip'),
175
+ dict(
176
+ type='Normalize',
177
+ mean=[123.675, 116.28, 103.53],
178
+ std=[58.395, 57.12, 57.375],
179
+ to_rgb=True),
180
+ dict(type='Pad', size_divisor=32),
181
+ dict(type='ImageToTensor', keys=['img']),
182
+ dict(type='Collect', keys=['img'])
183
+ ])
184
+ ]),
185
+ test=dict(
186
+ type='CocoDataset',
187
+ ann_file='data/coco/annotations/instances_val2017.json',
188
+ img_prefix='data/coco/val2017/',
189
+ pipeline=[
190
+ dict(type='LoadImageFromFile'),
191
+ dict(
192
+ type='MultiScaleFlipAug',
193
+ img_scale=(1333, 800),
194
+ flip=False,
195
+ transforms=[
196
+ dict(type='Resize', keep_ratio=True),
197
+ dict(type='RandomFlip'),
198
+ dict(
199
+ type='Normalize',
200
+ mean=[123.675, 116.28, 103.53],
201
+ std=[58.395, 57.12, 57.375],
202
+ to_rgb=True),
203
+ dict(type='Pad', size_divisor=32),
204
+ dict(type='ImageToTensor', keys=['img']),
205
+ dict(type='Collect', keys=['img'])
206
+ ])
207
+ ]))
208
+ evaluation = dict(interval=1, metric='bbox')
209
+ optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
210
+ optimizer_config = dict(grad_clip=None)
211
+ lr_config = dict(
212
+ policy='step',
213
+ warmup='linear',
214
+ warmup_iters=500,
215
+ warmup_ratio=0.001,
216
+ step=[8, 11])
217
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
218
+ checkpoint_config = dict(interval=1)
219
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
220
+ custom_hooks = [dict(type='NumClassCheckHook')]
221
+ dist_params = dict(backend='nccl')
222
+ log_level = 'INFO'
223
+ load_from = None
224
+ resume_from = None
225
+ workflow = [('train', 1)]
226
+ opencv_num_threads = 0
227
+ mp_start_method = 'fork'
228
+ auto_scale_lr = dict(enable=False, base_batch_size=16)
configs/topdown_heatmap_hrnet_w48_coco_256x192.py ADDED
@@ -0,0 +1,1129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint_config = dict(interval=10)
2
+ log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
3
+ log_level = 'INFO'
4
+ load_from = None
5
+ resume_from = None
6
+ dist_params = dict(backend='nccl')
7
+ workflow = [('train', 1)]
8
+ opencv_num_threads = 0
9
+ mp_start_method = 'fork'
10
+ dataset_info = dict(
11
+ dataset_name='coco',
12
+ paper_info=dict(
13
+ author=
14
+ 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
15
+ title='Microsoft coco: Common objects in context',
16
+ container='European conference on computer vision',
17
+ year='2014',
18
+ homepage='http://cocodataset.org/'),
19
+ keypoint_info=dict({
20
+ 0:
21
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
22
+ 1:
23
+ dict(
24
+ name='left_eye',
25
+ id=1,
26
+ color=[51, 153, 255],
27
+ type='upper',
28
+ swap='right_eye'),
29
+ 2:
30
+ dict(
31
+ name='right_eye',
32
+ id=2,
33
+ color=[51, 153, 255],
34
+ type='upper',
35
+ swap='left_eye'),
36
+ 3:
37
+ dict(
38
+ name='left_ear',
39
+ id=3,
40
+ color=[51, 153, 255],
41
+ type='upper',
42
+ swap='right_ear'),
43
+ 4:
44
+ dict(
45
+ name='right_ear',
46
+ id=4,
47
+ color=[51, 153, 255],
48
+ type='upper',
49
+ swap='left_ear'),
50
+ 5:
51
+ dict(
52
+ name='left_shoulder',
53
+ id=5,
54
+ color=[0, 255, 0],
55
+ type='upper',
56
+ swap='right_shoulder'),
57
+ 6:
58
+ dict(
59
+ name='right_shoulder',
60
+ id=6,
61
+ color=[255, 128, 0],
62
+ type='upper',
63
+ swap='left_shoulder'),
64
+ 7:
65
+ dict(
66
+ name='left_elbow',
67
+ id=7,
68
+ color=[0, 255, 0],
69
+ type='upper',
70
+ swap='right_elbow'),
71
+ 8:
72
+ dict(
73
+ name='right_elbow',
74
+ id=8,
75
+ color=[255, 128, 0],
76
+ type='upper',
77
+ swap='left_elbow'),
78
+ 9:
79
+ dict(
80
+ name='left_wrist',
81
+ id=9,
82
+ color=[0, 255, 0],
83
+ type='upper',
84
+ swap='right_wrist'),
85
+ 10:
86
+ dict(
87
+ name='right_wrist',
88
+ id=10,
89
+ color=[255, 128, 0],
90
+ type='upper',
91
+ swap='left_wrist'),
92
+ 11:
93
+ dict(
94
+ name='left_hip',
95
+ id=11,
96
+ color=[0, 255, 0],
97
+ type='lower',
98
+ swap='right_hip'),
99
+ 12:
100
+ dict(
101
+ name='right_hip',
102
+ id=12,
103
+ color=[255, 128, 0],
104
+ type='lower',
105
+ swap='left_hip'),
106
+ 13:
107
+ dict(
108
+ name='left_knee',
109
+ id=13,
110
+ color=[0, 255, 0],
111
+ type='lower',
112
+ swap='right_knee'),
113
+ 14:
114
+ dict(
115
+ name='right_knee',
116
+ id=14,
117
+ color=[255, 128, 0],
118
+ type='lower',
119
+ swap='left_knee'),
120
+ 15:
121
+ dict(
122
+ name='left_ankle',
123
+ id=15,
124
+ color=[0, 255, 0],
125
+ type='lower',
126
+ swap='right_ankle'),
127
+ 16:
128
+ dict(
129
+ name='right_ankle',
130
+ id=16,
131
+ color=[255, 128, 0],
132
+ type='lower',
133
+ swap='left_ankle')
134
+ }),
135
+ skeleton_info=dict({
136
+ 0:
137
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
138
+ 1:
139
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
140
+ 2:
141
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
142
+ 3:
143
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
144
+ 4:
145
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
146
+ 5:
147
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
148
+ 6:
149
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
150
+ 7:
151
+ dict(
152
+ link=('left_shoulder', 'right_shoulder'),
153
+ id=7,
154
+ color=[51, 153, 255]),
155
+ 8:
156
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
157
+ 9:
158
+ dict(
159
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
160
+ 10:
161
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
162
+ 11:
163
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
164
+ 12:
165
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
166
+ 13:
167
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
168
+ 14:
169
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
170
+ 15:
171
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
172
+ 16:
173
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
174
+ 17:
175
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
176
+ 18:
177
+ dict(
178
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
179
+ }),
180
+ joint_weights=[
181
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0, 1.0, 1.2,
182
+ 1.2, 1.5, 1.5
183
+ ],
184
+ sigmas=[
185
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
186
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
187
+ ])
188
+ evaluation = dict(interval=10, metric='mAP', save_best='AP')
189
+ optimizer = dict(type='Adam', lr=0.0005)
190
+ optimizer_config = dict(grad_clip=None)
191
+ lr_config = dict(
192
+ policy='step',
193
+ warmup='linear',
194
+ warmup_iters=500,
195
+ warmup_ratio=0.001,
196
+ step=[170, 200])
197
+ total_epochs = 210
198
+ channel_cfg = dict(
199
+ num_output_channels=17,
200
+ dataset_joints=17,
201
+ dataset_channel=[[
202
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
203
+ ]],
204
+ inference_channel=[
205
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
206
+ ])
207
+ model = dict(
208
+ type='TopDown',
209
+ pretrained=
210
+ 'https://download.openmmlab.com/mmpose/pretrain_models/hrnet_w48-8ef0771d.pth',
211
+ backbone=dict(
212
+ type='HRNet',
213
+ in_channels=3,
214
+ extra=dict(
215
+ stage1=dict(
216
+ num_modules=1,
217
+ num_branches=1,
218
+ block='BOTTLENECK',
219
+ num_blocks=(4, ),
220
+ num_channels=(64, )),
221
+ stage2=dict(
222
+ num_modules=1,
223
+ num_branches=2,
224
+ block='BASIC',
225
+ num_blocks=(4, 4),
226
+ num_channels=(48, 96)),
227
+ stage3=dict(
228
+ num_modules=4,
229
+ num_branches=3,
230
+ block='BASIC',
231
+ num_blocks=(4, 4, 4),
232
+ num_channels=(48, 96, 192)),
233
+ stage4=dict(
234
+ num_modules=3,
235
+ num_branches=4,
236
+ block='BASIC',
237
+ num_blocks=(4, 4, 4, 4),
238
+ num_channels=(48, 96, 192, 384)))),
239
+ keypoint_head=dict(
240
+ type='TopdownHeatmapSimpleHead',
241
+ in_channels=48,
242
+ out_channels=17,
243
+ num_deconv_layers=0,
244
+ extra=dict(final_conv_kernel=1),
245
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
246
+ train_cfg=dict(),
247
+ test_cfg=dict(
248
+ flip_test=True,
249
+ post_process='default',
250
+ shift_heatmap=True,
251
+ modulate_kernel=11))
252
+ data_cfg = dict(
253
+ image_size=[192, 256],
254
+ heatmap_size=[48, 64],
255
+ num_output_channels=17,
256
+ num_joints=17,
257
+ dataset_channel=[[
258
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
259
+ ]],
260
+ inference_channel=[
261
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
262
+ ],
263
+ soft_nms=False,
264
+ nms_thr=1.0,
265
+ oks_thr=0.9,
266
+ vis_thr=0.2,
267
+ use_gt_bbox=False,
268
+ det_bbox_thr=0.0,
269
+ bbox_file=
270
+ 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
271
+ )
272
+ train_pipeline = [
273
+ dict(type='LoadImageFromFile'),
274
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
275
+ dict(
276
+ type='TopDownHalfBodyTransform',
277
+ num_joints_half_body=8,
278
+ prob_half_body=0.3),
279
+ dict(
280
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
281
+ dict(type='TopDownAffine'),
282
+ dict(type='ToTensor'),
283
+ dict(
284
+ type='NormalizeTensor',
285
+ mean=[0.485, 0.456, 0.406],
286
+ std=[0.229, 0.224, 0.225]),
287
+ dict(type='TopDownGenerateTarget', sigma=2),
288
+ dict(
289
+ type='Collect',
290
+ keys=['img', 'target', 'target_weight'],
291
+ meta_keys=[
292
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
293
+ 'rotation', 'bbox_score', 'flip_pairs'
294
+ ])
295
+ ]
296
+ val_pipeline = [
297
+ dict(type='LoadImageFromFile'),
298
+ dict(type='TopDownAffine'),
299
+ dict(type='ToTensor'),
300
+ dict(
301
+ type='NormalizeTensor',
302
+ mean=[0.485, 0.456, 0.406],
303
+ std=[0.229, 0.224, 0.225]),
304
+ dict(
305
+ type='Collect',
306
+ keys=['img'],
307
+ meta_keys=[
308
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
309
+ 'flip_pairs'
310
+ ])
311
+ ]
312
+ test_pipeline = [
313
+ dict(type='LoadImageFromFile'),
314
+ dict(type='TopDownAffine'),
315
+ dict(type='ToTensor'),
316
+ dict(
317
+ type='NormalizeTensor',
318
+ mean=[0.485, 0.456, 0.406],
319
+ std=[0.229, 0.224, 0.225]),
320
+ dict(
321
+ type='Collect',
322
+ keys=['img'],
323
+ meta_keys=[
324
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
325
+ 'flip_pairs'
326
+ ])
327
+ ]
328
+ data_root = 'data/coco'
329
+ data = dict(
330
+ samples_per_gpu=32,
331
+ workers_per_gpu=2,
332
+ val_dataloader=dict(samples_per_gpu=32),
333
+ test_dataloader=dict(samples_per_gpu=32),
334
+ train=dict(
335
+ type='TopDownCocoDataset',
336
+ ann_file='data/coco/annotations/person_keypoints_train2017.json',
337
+ img_prefix='data/coco/train2017/',
338
+ data_cfg=dict(
339
+ image_size=[192, 256],
340
+ heatmap_size=[48, 64],
341
+ num_output_channels=17,
342
+ num_joints=17,
343
+ dataset_channel=[[
344
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
345
+ ]],
346
+ inference_channel=[
347
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
348
+ ],
349
+ soft_nms=False,
350
+ nms_thr=1.0,
351
+ oks_thr=0.9,
352
+ vis_thr=0.2,
353
+ use_gt_bbox=False,
354
+ det_bbox_thr=0.0,
355
+ bbox_file=
356
+ 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
357
+ ),
358
+ pipeline=[
359
+ dict(type='LoadImageFromFile'),
360
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
361
+ dict(
362
+ type='TopDownHalfBodyTransform',
363
+ num_joints_half_body=8,
364
+ prob_half_body=0.3),
365
+ dict(
366
+ type='TopDownGetRandomScaleRotation',
367
+ rot_factor=40,
368
+ scale_factor=0.5),
369
+ dict(type='TopDownAffine'),
370
+ dict(type='ToTensor'),
371
+ dict(
372
+ type='NormalizeTensor',
373
+ mean=[0.485, 0.456, 0.406],
374
+ std=[0.229, 0.224, 0.225]),
375
+ dict(type='TopDownGenerateTarget', sigma=2),
376
+ dict(
377
+ type='Collect',
378
+ keys=['img', 'target', 'target_weight'],
379
+ meta_keys=[
380
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center',
381
+ 'scale', 'rotation', 'bbox_score', 'flip_pairs'
382
+ ])
383
+ ],
384
+ dataset_info=dict(
385
+ dataset_name='coco',
386
+ paper_info=dict(
387
+ author=
388
+ 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
389
+ title='Microsoft coco: Common objects in context',
390
+ container='European conference on computer vision',
391
+ year='2014',
392
+ homepage='http://cocodataset.org/'),
393
+ keypoint_info=dict({
394
+ 0:
395
+ dict(
396
+ name='nose',
397
+ id=0,
398
+ color=[51, 153, 255],
399
+ type='upper',
400
+ swap=''),
401
+ 1:
402
+ dict(
403
+ name='left_eye',
404
+ id=1,
405
+ color=[51, 153, 255],
406
+ type='upper',
407
+ swap='right_eye'),
408
+ 2:
409
+ dict(
410
+ name='right_eye',
411
+ id=2,
412
+ color=[51, 153, 255],
413
+ type='upper',
414
+ swap='left_eye'),
415
+ 3:
416
+ dict(
417
+ name='left_ear',
418
+ id=3,
419
+ color=[51, 153, 255],
420
+ type='upper',
421
+ swap='right_ear'),
422
+ 4:
423
+ dict(
424
+ name='right_ear',
425
+ id=4,
426
+ color=[51, 153, 255],
427
+ type='upper',
428
+ swap='left_ear'),
429
+ 5:
430
+ dict(
431
+ name='left_shoulder',
432
+ id=5,
433
+ color=[0, 255, 0],
434
+ type='upper',
435
+ swap='right_shoulder'),
436
+ 6:
437
+ dict(
438
+ name='right_shoulder',
439
+ id=6,
440
+ color=[255, 128, 0],
441
+ type='upper',
442
+ swap='left_shoulder'),
443
+ 7:
444
+ dict(
445
+ name='left_elbow',
446
+ id=7,
447
+ color=[0, 255, 0],
448
+ type='upper',
449
+ swap='right_elbow'),
450
+ 8:
451
+ dict(
452
+ name='right_elbow',
453
+ id=8,
454
+ color=[255, 128, 0],
455
+ type='upper',
456
+ swap='left_elbow'),
457
+ 9:
458
+ dict(
459
+ name='left_wrist',
460
+ id=9,
461
+ color=[0, 255, 0],
462
+ type='upper',
463
+ swap='right_wrist'),
464
+ 10:
465
+ dict(
466
+ name='right_wrist',
467
+ id=10,
468
+ color=[255, 128, 0],
469
+ type='upper',
470
+ swap='left_wrist'),
471
+ 11:
472
+ dict(
473
+ name='left_hip',
474
+ id=11,
475
+ color=[0, 255, 0],
476
+ type='lower',
477
+ swap='right_hip'),
478
+ 12:
479
+ dict(
480
+ name='right_hip',
481
+ id=12,
482
+ color=[255, 128, 0],
483
+ type='lower',
484
+ swap='left_hip'),
485
+ 13:
486
+ dict(
487
+ name='left_knee',
488
+ id=13,
489
+ color=[0, 255, 0],
490
+ type='lower',
491
+ swap='right_knee'),
492
+ 14:
493
+ dict(
494
+ name='right_knee',
495
+ id=14,
496
+ color=[255, 128, 0],
497
+ type='lower',
498
+ swap='left_knee'),
499
+ 15:
500
+ dict(
501
+ name='left_ankle',
502
+ id=15,
503
+ color=[0, 255, 0],
504
+ type='lower',
505
+ swap='right_ankle'),
506
+ 16:
507
+ dict(
508
+ name='right_ankle',
509
+ id=16,
510
+ color=[255, 128, 0],
511
+ type='lower',
512
+ swap='left_ankle')
513
+ }),
514
+ skeleton_info=dict({
515
+ 0:
516
+ dict(
517
+ link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
518
+ 1:
519
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
520
+ 2:
521
+ dict(
522
+ link=('right_ankle', 'right_knee'),
523
+ id=2,
524
+ color=[255, 128, 0]),
525
+ 3:
526
+ dict(
527
+ link=('right_knee', 'right_hip'),
528
+ id=3,
529
+ color=[255, 128, 0]),
530
+ 4:
531
+ dict(
532
+ link=('left_hip', 'right_hip'), id=4, color=[51, 153,
533
+ 255]),
534
+ 5:
535
+ dict(
536
+ link=('left_shoulder', 'left_hip'),
537
+ id=5,
538
+ color=[51, 153, 255]),
539
+ 6:
540
+ dict(
541
+ link=('right_shoulder', 'right_hip'),
542
+ id=6,
543
+ color=[51, 153, 255]),
544
+ 7:
545
+ dict(
546
+ link=('left_shoulder', 'right_shoulder'),
547
+ id=7,
548
+ color=[51, 153, 255]),
549
+ 8:
550
+ dict(
551
+ link=('left_shoulder', 'left_elbow'),
552
+ id=8,
553
+ color=[0, 255, 0]),
554
+ 9:
555
+ dict(
556
+ link=('right_shoulder', 'right_elbow'),
557
+ id=9,
558
+ color=[255, 128, 0]),
559
+ 10:
560
+ dict(
561
+ link=('left_elbow', 'left_wrist'),
562
+ id=10,
563
+ color=[0, 255, 0]),
564
+ 11:
565
+ dict(
566
+ link=('right_elbow', 'right_wrist'),
567
+ id=11,
568
+ color=[255, 128, 0]),
569
+ 12:
570
+ dict(
571
+ link=('left_eye', 'right_eye'),
572
+ id=12,
573
+ color=[51, 153, 255]),
574
+ 13:
575
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
576
+ 14:
577
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
578
+ 15:
579
+ dict(
580
+ link=('left_eye', 'left_ear'), id=15, color=[51, 153,
581
+ 255]),
582
+ 16:
583
+ dict(
584
+ link=('right_eye', 'right_ear'),
585
+ id=16,
586
+ color=[51, 153, 255]),
587
+ 17:
588
+ dict(
589
+ link=('left_ear', 'left_shoulder'),
590
+ id=17,
591
+ color=[51, 153, 255]),
592
+ 18:
593
+ dict(
594
+ link=('right_ear', 'right_shoulder'),
595
+ id=18,
596
+ color=[51, 153, 255])
597
+ }),
598
+ joint_weights=[
599
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
600
+ 1.0, 1.2, 1.2, 1.5, 1.5
601
+ ],
602
+ sigmas=[
603
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
604
+ 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
605
+ ])),
606
+ val=dict(
607
+ type='TopDownCocoDataset',
608
+ ann_file='data/coco/annotations/person_keypoints_val2017.json',
609
+ img_prefix='data/coco/val2017/',
610
+ data_cfg=dict(
611
+ image_size=[192, 256],
612
+ heatmap_size=[48, 64],
613
+ num_output_channels=17,
614
+ num_joints=17,
615
+ dataset_channel=[[
616
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
617
+ ]],
618
+ inference_channel=[
619
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
620
+ ],
621
+ soft_nms=False,
622
+ nms_thr=1.0,
623
+ oks_thr=0.9,
624
+ vis_thr=0.2,
625
+ use_gt_bbox=False,
626
+ det_bbox_thr=0.0,
627
+ bbox_file=
628
+ 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
629
+ ),
630
+ pipeline=[
631
+ dict(type='LoadImageFromFile'),
632
+ dict(type='TopDownAffine'),
633
+ dict(type='ToTensor'),
634
+ dict(
635
+ type='NormalizeTensor',
636
+ mean=[0.485, 0.456, 0.406],
637
+ std=[0.229, 0.224, 0.225]),
638
+ dict(
639
+ type='Collect',
640
+ keys=['img'],
641
+ meta_keys=[
642
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
643
+ 'flip_pairs'
644
+ ])
645
+ ],
646
+ dataset_info=dict(
647
+ dataset_name='coco',
648
+ paper_info=dict(
649
+ author=
650
+ 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
651
+ title='Microsoft coco: Common objects in context',
652
+ container='European conference on computer vision',
653
+ year='2014',
654
+ homepage='http://cocodataset.org/'),
655
+ keypoint_info=dict({
656
+ 0:
657
+ dict(
658
+ name='nose',
659
+ id=0,
660
+ color=[51, 153, 255],
661
+ type='upper',
662
+ swap=''),
663
+ 1:
664
+ dict(
665
+ name='left_eye',
666
+ id=1,
667
+ color=[51, 153, 255],
668
+ type='upper',
669
+ swap='right_eye'),
670
+ 2:
671
+ dict(
672
+ name='right_eye',
673
+ id=2,
674
+ color=[51, 153, 255],
675
+ type='upper',
676
+ swap='left_eye'),
677
+ 3:
678
+ dict(
679
+ name='left_ear',
680
+ id=3,
681
+ color=[51, 153, 255],
682
+ type='upper',
683
+ swap='right_ear'),
684
+ 4:
685
+ dict(
686
+ name='right_ear',
687
+ id=4,
688
+ color=[51, 153, 255],
689
+ type='upper',
690
+ swap='left_ear'),
691
+ 5:
692
+ dict(
693
+ name='left_shoulder',
694
+ id=5,
695
+ color=[0, 255, 0],
696
+ type='upper',
697
+ swap='right_shoulder'),
698
+ 6:
699
+ dict(
700
+ name='right_shoulder',
701
+ id=6,
702
+ color=[255, 128, 0],
703
+ type='upper',
704
+ swap='left_shoulder'),
705
+ 7:
706
+ dict(
707
+ name='left_elbow',
708
+ id=7,
709
+ color=[0, 255, 0],
710
+ type='upper',
711
+ swap='right_elbow'),
712
+ 8:
713
+ dict(
714
+ name='right_elbow',
715
+ id=8,
716
+ color=[255, 128, 0],
717
+ type='upper',
718
+ swap='left_elbow'),
719
+ 9:
720
+ dict(
721
+ name='left_wrist',
722
+ id=9,
723
+ color=[0, 255, 0],
724
+ type='upper',
725
+ swap='right_wrist'),
726
+ 10:
727
+ dict(
728
+ name='right_wrist',
729
+ id=10,
730
+ color=[255, 128, 0],
731
+ type='upper',
732
+ swap='left_wrist'),
733
+ 11:
734
+ dict(
735
+ name='left_hip',
736
+ id=11,
737
+ color=[0, 255, 0],
738
+ type='lower',
739
+ swap='right_hip'),
740
+ 12:
741
+ dict(
742
+ name='right_hip',
743
+ id=12,
744
+ color=[255, 128, 0],
745
+ type='lower',
746
+ swap='left_hip'),
747
+ 13:
748
+ dict(
749
+ name='left_knee',
750
+ id=13,
751
+ color=[0, 255, 0],
752
+ type='lower',
753
+ swap='right_knee'),
754
+ 14:
755
+ dict(
756
+ name='right_knee',
757
+ id=14,
758
+ color=[255, 128, 0],
759
+ type='lower',
760
+ swap='left_knee'),
761
+ 15:
762
+ dict(
763
+ name='left_ankle',
764
+ id=15,
765
+ color=[0, 255, 0],
766
+ type='lower',
767
+ swap='right_ankle'),
768
+ 16:
769
+ dict(
770
+ name='right_ankle',
771
+ id=16,
772
+ color=[255, 128, 0],
773
+ type='lower',
774
+ swap='left_ankle')
775
+ }),
776
+ skeleton_info=dict({
777
+ 0:
778
+ dict(
779
+ link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
780
+ 1:
781
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
782
+ 2:
783
+ dict(
784
+ link=('right_ankle', 'right_knee'),
785
+ id=2,
786
+ color=[255, 128, 0]),
787
+ 3:
788
+ dict(
789
+ link=('right_knee', 'right_hip'),
790
+ id=3,
791
+ color=[255, 128, 0]),
792
+ 4:
793
+ dict(
794
+ link=('left_hip', 'right_hip'), id=4, color=[51, 153,
795
+ 255]),
796
+ 5:
797
+ dict(
798
+ link=('left_shoulder', 'left_hip'),
799
+ id=5,
800
+ color=[51, 153, 255]),
801
+ 6:
802
+ dict(
803
+ link=('right_shoulder', 'right_hip'),
804
+ id=6,
805
+ color=[51, 153, 255]),
806
+ 7:
807
+ dict(
808
+ link=('left_shoulder', 'right_shoulder'),
809
+ id=7,
810
+ color=[51, 153, 255]),
811
+ 8:
812
+ dict(
813
+ link=('left_shoulder', 'left_elbow'),
814
+ id=8,
815
+ color=[0, 255, 0]),
816
+ 9:
817
+ dict(
818
+ link=('right_shoulder', 'right_elbow'),
819
+ id=9,
820
+ color=[255, 128, 0]),
821
+ 10:
822
+ dict(
823
+ link=('left_elbow', 'left_wrist'),
824
+ id=10,
825
+ color=[0, 255, 0]),
826
+ 11:
827
+ dict(
828
+ link=('right_elbow', 'right_wrist'),
829
+ id=11,
830
+ color=[255, 128, 0]),
831
+ 12:
832
+ dict(
833
+ link=('left_eye', 'right_eye'),
834
+ id=12,
835
+ color=[51, 153, 255]),
836
+ 13:
837
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
838
+ 14:
839
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
840
+ 15:
841
+ dict(
842
+ link=('left_eye', 'left_ear'), id=15, color=[51, 153,
843
+ 255]),
844
+ 16:
845
+ dict(
846
+ link=('right_eye', 'right_ear'),
847
+ id=16,
848
+ color=[51, 153, 255]),
849
+ 17:
850
+ dict(
851
+ link=('left_ear', 'left_shoulder'),
852
+ id=17,
853
+ color=[51, 153, 255]),
854
+ 18:
855
+ dict(
856
+ link=('right_ear', 'right_shoulder'),
857
+ id=18,
858
+ color=[51, 153, 255])
859
+ }),
860
+ joint_weights=[
861
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
862
+ 1.0, 1.2, 1.2, 1.5, 1.5
863
+ ],
864
+ sigmas=[
865
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
866
+ 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
867
+ ])),
868
+ test=dict(
869
+ type='TopDownCocoDataset',
870
+ ann_file='data/coco/annotations/person_keypoints_val2017.json',
871
+ img_prefix='data/coco/val2017/',
872
+ data_cfg=dict(
873
+ image_size=[192, 256],
874
+ heatmap_size=[48, 64],
875
+ num_output_channels=17,
876
+ num_joints=17,
877
+ dataset_channel=[[
878
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
879
+ ]],
880
+ inference_channel=[
881
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
882
+ ],
883
+ soft_nms=False,
884
+ nms_thr=1.0,
885
+ oks_thr=0.9,
886
+ vis_thr=0.2,
887
+ use_gt_bbox=False,
888
+ det_bbox_thr=0.0,
889
+ bbox_file=
890
+ 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
891
+ ),
892
+ pipeline=[
893
+ dict(type='LoadImageFromFile'),
894
+ dict(type='TopDownAffine'),
895
+ dict(type='ToTensor'),
896
+ dict(
897
+ type='NormalizeTensor',
898
+ mean=[0.485, 0.456, 0.406],
899
+ std=[0.229, 0.224, 0.225]),
900
+ dict(
901
+ type='Collect',
902
+ keys=['img'],
903
+ meta_keys=[
904
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
905
+ 'flip_pairs'
906
+ ])
907
+ ],
908
+ dataset_info=dict(
909
+ dataset_name='coco',
910
+ paper_info=dict(
911
+ author=
912
+ 'Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence',
913
+ title='Microsoft coco: Common objects in context',
914
+ container='European conference on computer vision',
915
+ year='2014',
916
+ homepage='http://cocodataset.org/'),
917
+ keypoint_info=dict({
918
+ 0:
919
+ dict(
920
+ name='nose',
921
+ id=0,
922
+ color=[51, 153, 255],
923
+ type='upper',
924
+ swap=''),
925
+ 1:
926
+ dict(
927
+ name='left_eye',
928
+ id=1,
929
+ color=[51, 153, 255],
930
+ type='upper',
931
+ swap='right_eye'),
932
+ 2:
933
+ dict(
934
+ name='right_eye',
935
+ id=2,
936
+ color=[51, 153, 255],
937
+ type='upper',
938
+ swap='left_eye'),
939
+ 3:
940
+ dict(
941
+ name='left_ear',
942
+ id=3,
943
+ color=[51, 153, 255],
944
+ type='upper',
945
+ swap='right_ear'),
946
+ 4:
947
+ dict(
948
+ name='right_ear',
949
+ id=4,
950
+ color=[51, 153, 255],
951
+ type='upper',
952
+ swap='left_ear'),
953
+ 5:
954
+ dict(
955
+ name='left_shoulder',
956
+ id=5,
957
+ color=[0, 255, 0],
958
+ type='upper',
959
+ swap='right_shoulder'),
960
+ 6:
961
+ dict(
962
+ name='right_shoulder',
963
+ id=6,
964
+ color=[255, 128, 0],
965
+ type='upper',
966
+ swap='left_shoulder'),
967
+ 7:
968
+ dict(
969
+ name='left_elbow',
970
+ id=7,
971
+ color=[0, 255, 0],
972
+ type='upper',
973
+ swap='right_elbow'),
974
+ 8:
975
+ dict(
976
+ name='right_elbow',
977
+ id=8,
978
+ color=[255, 128, 0],
979
+ type='upper',
980
+ swap='left_elbow'),
981
+ 9:
982
+ dict(
983
+ name='left_wrist',
984
+ id=9,
985
+ color=[0, 255, 0],
986
+ type='upper',
987
+ swap='right_wrist'),
988
+ 10:
989
+ dict(
990
+ name='right_wrist',
991
+ id=10,
992
+ color=[255, 128, 0],
993
+ type='upper',
994
+ swap='left_wrist'),
995
+ 11:
996
+ dict(
997
+ name='left_hip',
998
+ id=11,
999
+ color=[0, 255, 0],
1000
+ type='lower',
1001
+ swap='right_hip'),
1002
+ 12:
1003
+ dict(
1004
+ name='right_hip',
1005
+ id=12,
1006
+ color=[255, 128, 0],
1007
+ type='lower',
1008
+ swap='left_hip'),
1009
+ 13:
1010
+ dict(
1011
+ name='left_knee',
1012
+ id=13,
1013
+ color=[0, 255, 0],
1014
+ type='lower',
1015
+ swap='right_knee'),
1016
+ 14:
1017
+ dict(
1018
+ name='right_knee',
1019
+ id=14,
1020
+ color=[255, 128, 0],
1021
+ type='lower',
1022
+ swap='left_knee'),
1023
+ 15:
1024
+ dict(
1025
+ name='left_ankle',
1026
+ id=15,
1027
+ color=[0, 255, 0],
1028
+ type='lower',
1029
+ swap='right_ankle'),
1030
+ 16:
1031
+ dict(
1032
+ name='right_ankle',
1033
+ id=16,
1034
+ color=[255, 128, 0],
1035
+ type='lower',
1036
+ swap='left_ankle')
1037
+ }),
1038
+ skeleton_info=dict({
1039
+ 0:
1040
+ dict(
1041
+ link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1042
+ 1:
1043
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
1044
+ 2:
1045
+ dict(
1046
+ link=('right_ankle', 'right_knee'),
1047
+ id=2,
1048
+ color=[255, 128, 0]),
1049
+ 3:
1050
+ dict(
1051
+ link=('right_knee', 'right_hip'),
1052
+ id=3,
1053
+ color=[255, 128, 0]),
1054
+ 4:
1055
+ dict(
1056
+ link=('left_hip', 'right_hip'), id=4, color=[51, 153,
1057
+ 255]),
1058
+ 5:
1059
+ dict(
1060
+ link=('left_shoulder', 'left_hip'),
1061
+ id=5,
1062
+ color=[51, 153, 255]),
1063
+ 6:
1064
+ dict(
1065
+ link=('right_shoulder', 'right_hip'),
1066
+ id=6,
1067
+ color=[51, 153, 255]),
1068
+ 7:
1069
+ dict(
1070
+ link=('left_shoulder', 'right_shoulder'),
1071
+ id=7,
1072
+ color=[51, 153, 255]),
1073
+ 8:
1074
+ dict(
1075
+ link=('left_shoulder', 'left_elbow'),
1076
+ id=8,
1077
+ color=[0, 255, 0]),
1078
+ 9:
1079
+ dict(
1080
+ link=('right_shoulder', 'right_elbow'),
1081
+ id=9,
1082
+ color=[255, 128, 0]),
1083
+ 10:
1084
+ dict(
1085
+ link=('left_elbow', 'left_wrist'),
1086
+ id=10,
1087
+ color=[0, 255, 0]),
1088
+ 11:
1089
+ dict(
1090
+ link=('right_elbow', 'right_wrist'),
1091
+ id=11,
1092
+ color=[255, 128, 0]),
1093
+ 12:
1094
+ dict(
1095
+ link=('left_eye', 'right_eye'),
1096
+ id=12,
1097
+ color=[51, 153, 255]),
1098
+ 13:
1099
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
1100
+ 14:
1101
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
1102
+ 15:
1103
+ dict(
1104
+ link=('left_eye', 'left_ear'), id=15, color=[51, 153,
1105
+ 255]),
1106
+ 16:
1107
+ dict(
1108
+ link=('right_eye', 'right_ear'),
1109
+ id=16,
1110
+ color=[51, 153, 255]),
1111
+ 17:
1112
+ dict(
1113
+ link=('left_ear', 'left_shoulder'),
1114
+ id=17,
1115
+ color=[51, 153, 255]),
1116
+ 18:
1117
+ dict(
1118
+ link=('right_ear', 'right_shoulder'),
1119
+ id=18,
1120
+ color=[51, 153, 255])
1121
+ }),
1122
+ joint_weights=[
1123
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.2, 1.5, 1.5, 1.0,
1124
+ 1.0, 1.2, 1.2, 1.5, 1.5
1125
+ ],
1126
+ sigmas=[
1127
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072,
1128
+ 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
1129
+ ])))
examples/000001_mpiinew_test.mp4 ADDED
Binary file (159 kB). View file
 
faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047c8118fc5ca88ba5ae1fab72f2cd6b070501fe3af2f3cba5cfa9a89b44b03e
3
+ size 167287506
hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e0b3ab0439cb68e166c7543e59d2587cd8d7e9acf5ea62a8378eeb82fb50e5
3
+ size 255011654
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ mediapy
2
+ numpy==1.23.5
3
+ torch==1.11.0
4
+ torchvision==0.12.0
5
+ openmim==0.1.5
6
+ mmdet==2.24.1
7
+ mmpose==0.25.1