zhuoyang20 Skhaki commited on
Commit
509aa94
·
verified ·
1 Parent(s): e1e22df

Add Mask2Former-L low-resolution checkpoint (#3)

Browse files

- Add Mask2Former-L low-resolution checkpoint (883257b3bebea41cb483bcd5a0113fb54a2a2b33)


Co-authored-by: Samir Khaki <[email protected]>

mask2former-swinl-8xb2-512x1024-90k/mask2former-swinl-8xb2-512x1024-90k.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backbone_embed_multi = dict(decay_mult=0.0, lr_mult=0.1)
3
+ backbone_norm_multi = dict(decay_mult=0.0, lr_mult=0.1)
4
+ crop_size = (
5
+ 256,
6
+ 512,
7
+ )
8
+ custom_keys = dict({
9
+ 'absolute_pos_embed':
10
+ dict(decay_mult=0.0, lr_mult=0.1),
11
+ 'backbone':
12
+ dict(decay_mult=1.0, lr_mult=0.1),
13
+ 'backbone.norm':
14
+ dict(decay_mult=0.0, lr_mult=0.1),
15
+ 'backbone.patch_embed.norm':
16
+ dict(decay_mult=0.0, lr_mult=0.1),
17
+ 'backbone.stages.0.blocks.0.norm':
18
+ dict(decay_mult=0.0, lr_mult=0.1),
19
+ 'backbone.stages.0.blocks.1.norm':
20
+ dict(decay_mult=0.0, lr_mult=0.1),
21
+ 'backbone.stages.0.downsample.norm':
22
+ dict(decay_mult=0.0, lr_mult=0.1),
23
+ 'backbone.stages.1.blocks.0.norm':
24
+ dict(decay_mult=0.0, lr_mult=0.1),
25
+ 'backbone.stages.1.blocks.1.norm':
26
+ dict(decay_mult=0.0, lr_mult=0.1),
27
+ 'backbone.stages.1.downsample.norm':
28
+ dict(decay_mult=0.0, lr_mult=0.1),
29
+ 'backbone.stages.2.blocks.0.norm':
30
+ dict(decay_mult=0.0, lr_mult=0.1),
31
+ 'backbone.stages.2.blocks.1.norm':
32
+ dict(decay_mult=0.0, lr_mult=0.1),
33
+ 'backbone.stages.2.blocks.10.norm':
34
+ dict(decay_mult=0.0, lr_mult=0.1),
35
+ 'backbone.stages.2.blocks.11.norm':
36
+ dict(decay_mult=0.0, lr_mult=0.1),
37
+ 'backbone.stages.2.blocks.12.norm':
38
+ dict(decay_mult=0.0, lr_mult=0.1),
39
+ 'backbone.stages.2.blocks.13.norm':
40
+ dict(decay_mult=0.0, lr_mult=0.1),
41
+ 'backbone.stages.2.blocks.14.norm':
42
+ dict(decay_mult=0.0, lr_mult=0.1),
43
+ 'backbone.stages.2.blocks.15.norm':
44
+ dict(decay_mult=0.0, lr_mult=0.1),
45
+ 'backbone.stages.2.blocks.16.norm':
46
+ dict(decay_mult=0.0, lr_mult=0.1),
47
+ 'backbone.stages.2.blocks.17.norm':
48
+ dict(decay_mult=0.0, lr_mult=0.1),
49
+ 'backbone.stages.2.blocks.2.norm':
50
+ dict(decay_mult=0.0, lr_mult=0.1),
51
+ 'backbone.stages.2.blocks.3.norm':
52
+ dict(decay_mult=0.0, lr_mult=0.1),
53
+ 'backbone.stages.2.blocks.4.norm':
54
+ dict(decay_mult=0.0, lr_mult=0.1),
55
+ 'backbone.stages.2.blocks.5.norm':
56
+ dict(decay_mult=0.0, lr_mult=0.1),
57
+ 'backbone.stages.2.blocks.6.norm':
58
+ dict(decay_mult=0.0, lr_mult=0.1),
59
+ 'backbone.stages.2.blocks.7.norm':
60
+ dict(decay_mult=0.0, lr_mult=0.1),
61
+ 'backbone.stages.2.blocks.8.norm':
62
+ dict(decay_mult=0.0, lr_mult=0.1),
63
+ 'backbone.stages.2.blocks.9.norm':
64
+ dict(decay_mult=0.0, lr_mult=0.1),
65
+ 'backbone.stages.2.downsample.norm':
66
+ dict(decay_mult=0.0, lr_mult=0.1),
67
+ 'backbone.stages.3.blocks.0.norm':
68
+ dict(decay_mult=0.0, lr_mult=0.1),
69
+ 'backbone.stages.3.blocks.1.norm':
70
+ dict(decay_mult=0.0, lr_mult=0.1),
71
+ 'level_embed':
72
+ dict(decay_mult=0.0, lr_mult=1.0),
73
+ 'query_embed':
74
+ dict(decay_mult=0.0, lr_mult=1.0),
75
+ 'query_feat':
76
+ dict(decay_mult=0.0, lr_mult=1.0),
77
+ 'relative_position_bias_table':
78
+ dict(decay_mult=0.0, lr_mult=0.1)
79
+ })
80
+ data_preprocessor = dict(
81
+ bgr_to_rgb=True,
82
+ mean=[
83
+ 123.675,
84
+ 116.28,
85
+ 103.53,
86
+ ],
87
+ pad_val=0,
88
+ seg_pad_val=255,
89
+ size=(
90
+ 256,
91
+ 512,
92
+ ),
93
+ std=[
94
+ 58.395,
95
+ 57.12,
96
+ 57.375,
97
+ ],
98
+ test_cfg=dict(size_divisor=32),
99
+ type='SegDataPreProcessor')
100
+ data_root = '/dataset/cityscapes/'
101
+ dataset_type = 'CityscapesDataset'
102
+ default_hooks = dict(
103
+ checkpoint=dict(
104
+ by_epoch=False, interval=5000, save_best='mIoU',
105
+ type='CheckpointHook'),
106
+ logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
107
+ param_scheduler=dict(type='ParamSchedulerHook'),
108
+ sampler_seed=dict(type='DistSamplerSeedHook'),
109
+ timer=dict(type='IterTimerHook'),
110
+ visualization=dict(type='SegVisualizationHook'))
111
+ default_scope = 'mmseg'
112
+ depths = [
113
+ 2,
114
+ 2,
115
+ 18,
116
+ 2,
117
+ ]
118
+ embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
119
+ env_cfg = dict(
120
+ cudnn_benchmark=True,
121
+ dist_cfg=dict(backend='nccl'),
122
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
123
+ img_ratios = [
124
+ 0.5,
125
+ 0.75,
126
+ 1.0,
127
+ 1.25,
128
+ 1.5,
129
+ 1.75,
130
+ ]
131
+ launcher = 'pytorch'
132
+ load_from = 'work_dirs/mask2former-swinl-8xb2-512x1024-90k/mask2former-swinl-8xb2-512x1024-90k_ckpt.pth'
133
+ log_level = 'INFO'
134
+ log_processor = dict(by_epoch=False)
135
+ model = dict(
136
+ backbone=dict(
137
+ attn_drop_rate=0.0,
138
+ depths=[
139
+ 2,
140
+ 2,
141
+ 18,
142
+ 2,
143
+ ],
144
+ drop_path_rate=0.3,
145
+ drop_rate=0.0,
146
+ embed_dims=192,
147
+ frozen_stages=-1,
148
+ init_cfg=dict(
149
+ checkpoint=
150
+ 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth',
151
+ type='Pretrained'),
152
+ mlp_ratio=4,
153
+ num_heads=[
154
+ 6,
155
+ 12,
156
+ 24,
157
+ 48,
158
+ ],
159
+ out_indices=(
160
+ 0,
161
+ 1,
162
+ 2,
163
+ 3,
164
+ ),
165
+ patch_norm=True,
166
+ pretrain_img_size=384,
167
+ qk_scale=None,
168
+ qkv_bias=True,
169
+ type='SwinTransformer',
170
+ window_size=12,
171
+ with_cp=False),
172
+ data_preprocessor=dict(
173
+ bgr_to_rgb=True,
174
+ mean=[
175
+ 123.675,
176
+ 116.28,
177
+ 103.53,
178
+ ],
179
+ pad_val=0,
180
+ seg_pad_val=255,
181
+ size=(
182
+ 256,
183
+ 512,
184
+ ),
185
+ std=[
186
+ 58.395,
187
+ 57.12,
188
+ 57.375,
189
+ ],
190
+ test_cfg=dict(size_divisor=32),
191
+ type='SegDataPreProcessor'),
192
+ decode_head=dict(
193
+ align_corners=False,
194
+ enforce_decoder_input_project=False,
195
+ feat_channels=256,
196
+ in_channels=[
197
+ 192,
198
+ 384,
199
+ 768,
200
+ 1536,
201
+ ],
202
+ loss_cls=dict(
203
+ class_weight=[
204
+ 1.0,
205
+ 1.0,
206
+ 1.0,
207
+ 1.0,
208
+ 1.0,
209
+ 1.0,
210
+ 1.0,
211
+ 1.0,
212
+ 1.0,
213
+ 1.0,
214
+ 1.0,
215
+ 1.0,
216
+ 1.0,
217
+ 1.0,
218
+ 1.0,
219
+ 1.0,
220
+ 1.0,
221
+ 1.0,
222
+ 1.0,
223
+ 0.1,
224
+ ],
225
+ loss_weight=2.0,
226
+ reduction='mean',
227
+ type='mmdet.CrossEntropyLoss',
228
+ use_sigmoid=False),
229
+ loss_dice=dict(
230
+ activate=True,
231
+ eps=1.0,
232
+ loss_weight=5.0,
233
+ naive_dice=True,
234
+ reduction='mean',
235
+ type='mmdet.DiceLoss',
236
+ use_sigmoid=True),
237
+ loss_mask=dict(
238
+ loss_weight=5.0,
239
+ reduction='mean',
240
+ type='mmdet.CrossEntropyLoss',
241
+ use_sigmoid=True),
242
+ num_classes=19,
243
+ num_queries=100,
244
+ num_transformer_feat_level=3,
245
+ out_channels=256,
246
+ pixel_decoder=dict(
247
+ act_cfg=dict(type='ReLU'),
248
+ encoder=dict(
249
+ init_cfg=None,
250
+ layer_cfg=dict(
251
+ ffn_cfg=dict(
252
+ act_cfg=dict(inplace=True, type='ReLU'),
253
+ embed_dims=256,
254
+ feedforward_channels=1024,
255
+ ffn_drop=0.0,
256
+ num_fcs=2),
257
+ self_attn_cfg=dict(
258
+ batch_first=True,
259
+ dropout=0.0,
260
+ embed_dims=256,
261
+ im2col_step=64,
262
+ init_cfg=None,
263
+ norm_cfg=None,
264
+ num_heads=8,
265
+ num_levels=3,
266
+ num_points=4)),
267
+ num_layers=6),
268
+ init_cfg=None,
269
+ norm_cfg=dict(num_groups=32, type='GN'),
270
+ num_outs=3,
271
+ positional_encoding=dict(normalize=True, num_feats=128),
272
+ type='mmdet.MSDeformAttnPixelDecoder'),
273
+ positional_encoding=dict(normalize=True, num_feats=128),
274
+ strides=[
275
+ 4,
276
+ 8,
277
+ 16,
278
+ 32,
279
+ ],
280
+ train_cfg=dict(
281
+ assigner=dict(
282
+ match_costs=[
283
+ dict(type='mmdet.ClassificationCost', weight=2.0),
284
+ dict(
285
+ type='mmdet.CrossEntropyLossCost',
286
+ use_sigmoid=True,
287
+ weight=5.0),
288
+ dict(
289
+ eps=1.0,
290
+ pred_act=True,
291
+ type='mmdet.DiceCost',
292
+ weight=5.0),
293
+ ],
294
+ type='mmdet.HungarianAssigner'),
295
+ importance_sample_ratio=0.75,
296
+ num_points=12544,
297
+ oversample_ratio=3.0,
298
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
299
+ transformer_decoder=dict(
300
+ init_cfg=None,
301
+ layer_cfg=dict(
302
+ cross_attn_cfg=dict(
303
+ attn_drop=0.0,
304
+ batch_first=True,
305
+ dropout_layer=None,
306
+ embed_dims=256,
307
+ num_heads=8,
308
+ proj_drop=0.0),
309
+ ffn_cfg=dict(
310
+ act_cfg=dict(inplace=True, type='ReLU'),
311
+ add_identity=True,
312
+ dropout_layer=None,
313
+ embed_dims=256,
314
+ feedforward_channels=2048,
315
+ ffn_drop=0.0,
316
+ num_fcs=2),
317
+ self_attn_cfg=dict(
318
+ attn_drop=0.0,
319
+ batch_first=True,
320
+ dropout_layer=None,
321
+ embed_dims=256,
322
+ num_heads=8,
323
+ proj_drop=0.0)),
324
+ num_layers=9,
325
+ return_intermediate=True),
326
+ type='Mask2FormerHead'),
327
+ test_cfg=dict(mode='whole'),
328
+ train_cfg=dict(),
329
+ type='EncoderDecoder')
330
+ num_classes = 19
331
+ optim_wrapper = dict(
332
+ clip_grad=dict(max_norm=0.01, norm_type=2),
333
+ optimizer=dict(
334
+ betas=(
335
+ 0.9,
336
+ 0.999,
337
+ ),
338
+ eps=1e-08,
339
+ lr=0.0001,
340
+ type='AdamW',
341
+ weight_decay=0.05),
342
+ paramwise_cfg=dict(
343
+ custom_keys=dict({
344
+ 'absolute_pos_embed':
345
+ dict(decay_mult=0.0, lr_mult=0.1),
346
+ 'backbone':
347
+ dict(decay_mult=1.0, lr_mult=0.1),
348
+ 'backbone.norm':
349
+ dict(decay_mult=0.0, lr_mult=0.1),
350
+ 'backbone.patch_embed.norm':
351
+ dict(decay_mult=0.0, lr_mult=0.1),
352
+ 'backbone.stages.0.blocks.0.norm':
353
+ dict(decay_mult=0.0, lr_mult=0.1),
354
+ 'backbone.stages.0.blocks.1.norm':
355
+ dict(decay_mult=0.0, lr_mult=0.1),
356
+ 'backbone.stages.0.downsample.norm':
357
+ dict(decay_mult=0.0, lr_mult=0.1),
358
+ 'backbone.stages.1.blocks.0.norm':
359
+ dict(decay_mult=0.0, lr_mult=0.1),
360
+ 'backbone.stages.1.blocks.1.norm':
361
+ dict(decay_mult=0.0, lr_mult=0.1),
362
+ 'backbone.stages.1.downsample.norm':
363
+ dict(decay_mult=0.0, lr_mult=0.1),
364
+ 'backbone.stages.2.blocks.0.norm':
365
+ dict(decay_mult=0.0, lr_mult=0.1),
366
+ 'backbone.stages.2.blocks.1.norm':
367
+ dict(decay_mult=0.0, lr_mult=0.1),
368
+ 'backbone.stages.2.blocks.10.norm':
369
+ dict(decay_mult=0.0, lr_mult=0.1),
370
+ 'backbone.stages.2.blocks.11.norm':
371
+ dict(decay_mult=0.0, lr_mult=0.1),
372
+ 'backbone.stages.2.blocks.12.norm':
373
+ dict(decay_mult=0.0, lr_mult=0.1),
374
+ 'backbone.stages.2.blocks.13.norm':
375
+ dict(decay_mult=0.0, lr_mult=0.1),
376
+ 'backbone.stages.2.blocks.14.norm':
377
+ dict(decay_mult=0.0, lr_mult=0.1),
378
+ 'backbone.stages.2.blocks.15.norm':
379
+ dict(decay_mult=0.0, lr_mult=0.1),
380
+ 'backbone.stages.2.blocks.16.norm':
381
+ dict(decay_mult=0.0, lr_mult=0.1),
382
+ 'backbone.stages.2.blocks.17.norm':
383
+ dict(decay_mult=0.0, lr_mult=0.1),
384
+ 'backbone.stages.2.blocks.2.norm':
385
+ dict(decay_mult=0.0, lr_mult=0.1),
386
+ 'backbone.stages.2.blocks.3.norm':
387
+ dict(decay_mult=0.0, lr_mult=0.1),
388
+ 'backbone.stages.2.blocks.4.norm':
389
+ dict(decay_mult=0.0, lr_mult=0.1),
390
+ 'backbone.stages.2.blocks.5.norm':
391
+ dict(decay_mult=0.0, lr_mult=0.1),
392
+ 'backbone.stages.2.blocks.6.norm':
393
+ dict(decay_mult=0.0, lr_mult=0.1),
394
+ 'backbone.stages.2.blocks.7.norm':
395
+ dict(decay_mult=0.0, lr_mult=0.1),
396
+ 'backbone.stages.2.blocks.8.norm':
397
+ dict(decay_mult=0.0, lr_mult=0.1),
398
+ 'backbone.stages.2.blocks.9.norm':
399
+ dict(decay_mult=0.0, lr_mult=0.1),
400
+ 'backbone.stages.2.downsample.norm':
401
+ dict(decay_mult=0.0, lr_mult=0.1),
402
+ 'backbone.stages.3.blocks.0.norm':
403
+ dict(decay_mult=0.0, lr_mult=0.1),
404
+ 'backbone.stages.3.blocks.1.norm':
405
+ dict(decay_mult=0.0, lr_mult=0.1),
406
+ 'level_embed':
407
+ dict(decay_mult=0.0, lr_mult=1.0),
408
+ 'query_embed':
409
+ dict(decay_mult=0.0, lr_mult=1.0),
410
+ 'query_feat':
411
+ dict(decay_mult=0.0, lr_mult=1.0),
412
+ 'relative_position_bias_table':
413
+ dict(decay_mult=0.0, lr_mult=0.1)
414
+ }),
415
+ norm_decay_mult=0.0),
416
+ type='OptimWrapper')
417
+ optimizer = dict(
418
+ betas=(
419
+ 0.9,
420
+ 0.999,
421
+ ),
422
+ eps=1e-08,
423
+ lr=0.0001,
424
+ type='AdamW',
425
+ weight_decay=0.05)
426
+ param_scheduler = [
427
+ dict(
428
+ begin=0,
429
+ by_epoch=False,
430
+ end=90000,
431
+ eta_min=0,
432
+ power=0.9,
433
+ type='PolyLR'),
434
+ ]
435
+ pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth'
436
+ resume = False
437
+ test_cfg = dict(type='TestLoop')
438
+ test_dataloader = dict(
439
+ batch_size=1,
440
+ dataset=dict(
441
+ data_prefix=dict(
442
+ img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
443
+ data_root='/dataset/cityscapes/',
444
+ pipeline=[
445
+ dict(type='LoadImageFromFile'),
446
+ dict(keep_ratio=True, scale=(
447
+ 2048,
448
+ 1024,
449
+ ), type='Resize'),
450
+ dict(type='LoadAnnotations'),
451
+ dict(type='PackSegInputs'),
452
+ ],
453
+ type='CityscapesDataset'),
454
+ num_workers=4,
455
+ persistent_workers=True,
456
+ sampler=dict(shuffle=False, type='DefaultSampler'))
457
+ test_evaluator = dict(
458
+ iou_metrics=[
459
+ 'mIoU',
460
+ ], type='IoUMetric')
461
+ test_pipeline = [
462
+ dict(type='LoadImageFromFile'),
463
+ dict(keep_ratio=True, scale=(
464
+ 2048,
465
+ 1024,
466
+ ), type='Resize'),
467
+ dict(type='LoadAnnotations'),
468
+ dict(type='PackSegInputs'),
469
+ ]
470
+ train_cfg = dict(max_iters=90000, type='IterBasedTrainLoop', val_interval=5000)
471
+ train_dataloader = dict(
472
+ batch_size=2,
473
+ dataset=dict(
474
+ data_prefix=dict(
475
+ img_path='leftImg8bit/train', seg_map_path='gtFine/train'),
476
+ data_root='/dataset/cityscapes/',
477
+ pipeline=[
478
+ dict(type='LoadImageFromFile'),
479
+ dict(type='LoadAnnotations'),
480
+ dict(
481
+ max_size=4096,
482
+ resize_type='ResizeShortestEdge',
483
+ scales=[
484
+ 512,
485
+ 614,
486
+ 716,
487
+ 819,
488
+ 921,
489
+ 1024,
490
+ 1126,
491
+ 1228,
492
+ 1331,
493
+ 1433,
494
+ 1536,
495
+ 1638,
496
+ 1740,
497
+ 1843,
498
+ 1945,
499
+ 2048,
500
+ ],
501
+ type='RandomChoiceResize'),
502
+ dict(
503
+ cat_max_ratio=0.75, crop_size=(
504
+ 256,
505
+ 512,
506
+ ), type='RandomCrop'),
507
+ dict(prob=0.5, type='RandomFlip'),
508
+ dict(type='PhotoMetricDistortion'),
509
+ dict(type='PackSegInputs'),
510
+ ],
511
+ type='CityscapesDataset'),
512
+ num_workers=2,
513
+ persistent_workers=True,
514
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
515
+ train_pipeline = [
516
+ dict(type='LoadImageFromFile'),
517
+ dict(type='LoadAnnotations'),
518
+ dict(
519
+ max_size=4096,
520
+ resize_type='ResizeShortestEdge',
521
+ scales=[
522
+ 512,
523
+ 614,
524
+ 716,
525
+ 819,
526
+ 921,
527
+ 1024,
528
+ 1126,
529
+ 1228,
530
+ 1331,
531
+ 1433,
532
+ 1536,
533
+ 1638,
534
+ 1740,
535
+ 1843,
536
+ 1945,
537
+ 2048,
538
+ ],
539
+ type='RandomChoiceResize'),
540
+ dict(cat_max_ratio=0.75, crop_size=(
541
+ 256,
542
+ 512,
543
+ ), type='RandomCrop'),
544
+ dict(prob=0.5, type='RandomFlip'),
545
+ dict(type='PhotoMetricDistortion'),
546
+ dict(type='PackSegInputs'),
547
+ ]
548
+ tta_model = dict(type='SegTTAModel')
549
+ tta_pipeline = [
550
+ dict(backend_args=None, type='LoadImageFromFile'),
551
+ dict(
552
+ transforms=[
553
+ [
554
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
555
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
556
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
557
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
558
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
559
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
560
+ ],
561
+ [
562
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
563
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
564
+ ],
565
+ [
566
+ dict(type='LoadAnnotations'),
567
+ ],
568
+ [
569
+ dict(type='PackSegInputs'),
570
+ ],
571
+ ],
572
+ type='TestTimeAug'),
573
+ ]
574
+ val_cfg = dict(type='ValLoop')
575
+ val_dataloader = dict(
576
+ batch_size=1,
577
+ dataset=dict(
578
+ data_prefix=dict(
579
+ img_path='leftImg8bit/val', seg_map_path='gtFine/val'),
580
+ data_root='/dataset/cityscapes/',
581
+ pipeline=[
582
+ dict(type='LoadImageFromFile'),
583
+ dict(keep_ratio=True, scale=(
584
+ 2048,
585
+ 1024,
586
+ ), type='Resize'),
587
+ dict(type='LoadAnnotations'),
588
+ dict(type='PackSegInputs'),
589
+ ],
590
+ type='CityscapesDataset'),
591
+ num_workers=4,
592
+ persistent_workers=True,
593
+ sampler=dict(shuffle=False, type='DefaultSampler'))
594
+ val_evaluator = dict(
595
+ iou_metrics=[
596
+ 'mIoU',
597
+ ], type='IoUMetric')
598
+ vis_backends = [
599
+ dict(type='LocalVisBackend'),
600
+ ]
601
+ visualizer = dict(
602
+ name='visualizer',
603
+ type='SegLocalVisualizer',
604
+ vis_backends=[
605
+ dict(type='LocalVisBackend'),
606
+ ])
607
+ work_dir = './work_dirs/mask2former-swinl-8xb2-512x1024-90k'
mask2former-swinl-8xb2-512x1024-90k/mask2former-swinl-8xb2-512x1024-90k_ckpt.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b332dc95f3ec098a27016be86893bdfabaab08b5eaaca3cf2e3fe085afe1a142
3
+ size 2610352060