-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
766 lines (702 loc) · 49.8 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
<!DOCTYPE html>
<html lang="en">
<head>
<title>L2ID 2022</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<link href="https://fonts.googleapis.com/css?family=B612+Mono|Cabin:400,700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="fonts/icomoon/style.css">
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css"
integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
<link rel="stylesheet" href="css/jquery-ui.css">
<link rel="stylesheet" href="css/owl.carousel.min.css">
<link rel="stylesheet" href="css/owl.theme.default.min.css">
<link rel="stylesheet" href="css/owl.theme.default.min.css">
<link rel="stylesheet" href="css/jquery.fancybox.min.css">
<link rel="stylesheet" href="fonts/flaticon/font/flaticon.css">
<link rel="stylesheet" href="css/aos.css">
<link href="css/jquery.mb.YTPlayer.min.css" media="all" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="css/style.css">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<script>
(function (i, s, o, g, r, a, m) {
i['GoogleAnalyticsObject'] = r;
i[r] = i[r] || function () {
(i[r].q = i[r].q || []).push(arguments)
}, i[r].l = 1 * new Date();
a = s.createElement(o),
m = s.getElementsByTagName(o)[0];
a.async = 1;
a.src = g;
m.parentNode.insertBefore(a, m)
})(window, document, 'script', 'https://www.google-analytics.com/analytics.js', 'ga');
ga('create', 'UA-88572407-1', 'auto');
ga('send', 'pageview');
</script>
</head>
<body data-spy="scroll" data-target=".site-navbar-target" data-offset="300">
<div class="site-wrap">
<div class="site-mobile-menu site-navbar-target">
<div class="site-mobile-menu-header">
<div class="site-mobile-menu-close mt-3">
<span class="icon-close2 js-menu-toggle"></span>
</div>
</div>
<div class="site-mobile-menu-body"></div>
</div>
<div class="header-top">
<div class="container">
<div class="row align-items-center">
<div class="col-12 col-lg-6 d-flex">
<a href="index.html" class="site-logo">
Learning from Limited and Imperfect Data (L2ID)
</a>
<a href="#"
class="ml-auto d-inline-block d-lg-none site-menu-toggle js-menu-toggle text-black"><span
class="icon-menu h3"></span></a>
</div>
<div class="col-12 col-lg-6 ml-auto d-flex">
<div class="ml-md-auto top-social d-none d-lg-inline-block">
<a href="#" class="d-inline-block p-3"> </a>
<a href="#" class="d-inline-block p-3"> </a>
<a href="#" class="d-inline-block p-3"> </a>
</div>
</div>
<!-- <div class="col-6 d-block d-lg-none text-right">-->
</div>
</div>
</div>
<div class="site-navbar py-2 js-sticky-header site-navbar-target d-none pl-0 d-lg-block" role="banner">
<div class="container">
<div class="d-flex align-items-center">
<div class="mr-auto">
<nav class="site-navigation position-relative text-right" role="navigation">
<ul class="site-menu main-menu js-clone-nav mr-auto d-none pl-0 d-lg-block">
<li class="active">
<a href="index.html" class="nav-link text-left">Home</a>
</li>
<li>
<a href="index.html#dates" class="nav-link text-left">Important dates</a>
</li>
<!--<li>
<a href="index.html#schedule" class="nav-link text-left">Schedule</a>
</li>-->
<!--<li>
<a href="index.html#speakers" class="nav-link text-left">Speakers</a>
</li>-->
<!--<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" id="navbarDropdown"
role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Challenges
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="challenge_localization.html">Localization</a>
<a class="dropdown-item" href="challenge_classification.html">Classification</a>
</div>
</li>-->
<li>
<a href="index.html#people" class="nav-link text-left">Organizers</a>
</li>
<!-- <li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" href="challenge.html" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Challenge
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="challenge.html#challenge1">Object semantic segmentation with image-level supervision</a>
<a class="dropdown-item" href="challenge.html#challenge2">Scene parsing with point-based supervision</a>
</div>
</li> -->
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" id="navbarDropdown"
role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
Previous
</a>
<div class="dropdown-menu" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="https://lidchallenge.github.io/LID2019/">LID 2019</a>
<a class="dropdown-item" href="https://lidchallenge.github.io/">LID 2020</a>
<a class="dropdown-item" href="https://www.learning-with-limited-labels.com"> VL3 2020</a>
<a class="dropdown-item" href="https://l2id.github.io/"> L2ID 2021</a>
</div>
</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
</div>
<div class="site-blocks-cover overlay inner-page-cover" style="background-image: url('images/intro-background2.jpg');"
data-stellar-background-ratio="0.5">
<div class="container">
<div class="row align-items-center justify-content-center">
<div class="col-md-10 text-center" data-aos="fade-up">
<h3> Learning from Limited and Imperfect Data (L2ID)</h3>
<h5> A joint workshop combining Learning from Imperfect data (LID) and Visual Learning with Limited Labels (VL3)</h5>
<h3> Oct. 23 2022 Tel-Aviv, Israel (Online)</h3>
</div>
</div>
</div>
</div>
<div class="site-section">
<div class="container">
<div class="row">
<div class="col-lg-12">
<div class="section-title">
<h2>Introduction</h2>
</div>
<div class="trend-entry d-flex">
<div class="trend-contents">
<p>
Learning from limited or imperfect data (L^2ID) refers to a variety of studies that attempt to address challenging pattern recognition tasks by learning from limited, weak, or noisy supervision. Supervised learning methods including Deep Convolutional Neural Networks have significantly improved the performance in many problems in the field of computer vision, thanks to the rise of large-scale annotated data sets and the advance in computing hardware. However, these supervised learning approaches are notoriously "data hungry", which makes them sometimes not practical in many real-world industrial applications. This issue of availability of large quantities of labeled data becomes even more severe when considering visual classes that require annotation based on expert knowledge (e.g., medical imaging), classes that rarely occur, or object detection and instance segmentation tasks where the labeling requires more effort. To address this problem, many efforts, e.g., weakly supervised learning, few-shot learning, self/semi-supervised, cross-domain few-shot learning, domain adaptation, etc., have been made to improve robustness to this scenario.
The goal of this workshop, which builds on the successful CVPR 2021 L2ID workshop, is to bring together researchers across several computer vision and machine learning communities to navigate the complex landscape of methods that enable moving beyond fully supervised learning towards limited and imperfect label settings.
Topics that are of special interest (though submissions are not limited to these):
</p>
<ul>
<li>Few-shot learning for image classification, object detection, etc.</li>
<li>Cross-domain few-shot learning</li>
<li>Weakly-/semi- supervised learning algorithms</li>
<li>Zero-shot learning · Learning in the “long-tail” scenario</li>
<li>Self-supervised learning and unsupervised representation learning</li>
<li>Learning with noisy data</li>
<li>Any-shot learning – transitioning between few-shot, mid-shot, and many-shot training</li>
<li>Optimal data and source selection for effective meta-training with a known or unknown set of target categories</li>
<li>Data augmentation</li>
<li>New datasets and metrics to evaluate the benefit of such methods</li>
<li>Real world applications such as object semantic segmentation/detection/localization, scene parsing, video processing (e.g. action recognition, event detection, and object tracking)</li>
</ul>
<h3> Workshop Paper Submission Information </h3>
<br>The submissions should be formatted in the ECCV 2022 format and uploaded through the <a href="https://cmt3.research.microsoft.com/L2ID2022/Submission/Index"> <span class="link">L2ID CMT Site</a> <br>
Submitted papers can have one of the following formats:
<ul>
<li>Extended Abstracts of max 4 pages (not eligible for proceedings)</li>
<li>Papers of the same length of ECCV submissions (eligible for proceedings)</li>
</ul>
We encourage authors who want to present and discuss their ongoing work to choose the Extended Abstract format. According to the ECCV rules, extended abstracts will not count as archival. If you are not planning to include it in the proceedings (there is a checkbox for this) then you can use CVPR's format and limit it to four pages so that it can be later submitted to conferences and not count as a dual/double submission.
</div>
</div>
</div>
<div class="col-lg-12" id="speakers" style="padding-top:80px;margin-top:-80px;">
<div class="section-title">
<h2>Speakers</h2>
</div>
<div class="row justify-content-md-center">
<div class="col-md-3">
<div class="card">
<a href="https://laoreja.github.io/">
<img src="https://laoreja.github.io/images/me_200.png" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Xiuye Gu</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://www.microsoft.com/en-us/research/people/yucheng1/">
<img src="https://www.microsoft.com/en-us/research/uploads/prod/2022/01/21641170378_.pic_.jpg" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Yu Cheng</font></span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://sites.google.com/site/yinfeiyang/">
<img src="images/yinfei.jpg" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Yinfei Yang</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://researcher.watson.ibm.com/researcher/view.php?person=il-LEONIDKA">
<img src="https://s3.us.cloud-object-storage.appdomain.cloud/res-photos/8276.jpg" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Leonid Karlinsky</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://pages.cs.wisc.edu/~sharonli/">
<img src="https://pages.cs.wisc.edu/~sharonli/images/yixuanli-2019.JPG" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Sharon Yixuan Li</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="http://home.bharathh.info/">
<img src="images/bharath.jpg" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Bharath Hariharan</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://imisra.github.io/">
<img src="https://imisra.github.io/img/me-bos2.jpg" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Ishan Misra</font> </span>
</div>
</a>
</div>
</div>
<div class="col-md-3">
<div class="card">
<a href="https://sites.google.com/it-caesar.de/homepage/">
<img src="https://avatars.githubusercontent.com/u/39502217?v=4" height="175" width="175"/>
<div class="post-meta">
<span class="d-block"><font size=4>Holger Caesar</font> </span>
</div>
</a>
</div>
</div>
</div>
<br><br>
<div class="trend-entry d-flex">
<table align="center" class="table table-hover">
<thead>
<tr>
<th scope="col"> Speaker</th>
<th scope="col"> Talk</th>
</tr>
</thead>
<tbody>
<tr>
<td>
Xiuye is a research engineer at Google Research. Her research interests are in computer vision, with a current focus on open-vocabulary recognition. She was an AI resident at Google Research working with Tsung-Yi Lin and Yin Cui. Before that, she received her M.S. in Computer Science from Stanford University in 2020. She was a visiting scholar working with Prof. Yong Jae Lee. She received her B.E. in CS from Zhejiang University in 2017, where she worked with Prof. Deng Cai.
</td>
<td>
<a><em>Open-Vocabulary Detection and Segmentation</em></a>
<br><a>Existing visual recognition models often only work on the closed-set categories available in the training sets. In our recent work, we aim at going beyond this limitation. We design an open-vocabulary object detection method, ViLD and an open-vocabulary image segmentation model, OpenSeg, where the models detect objects or segment images with categories described by arbitrary texts. The two models address open-vocabulary recognition from two different perspectives: ViLD distills the knowledge from a pretrained open-vocabulary classification model (teacher) into a two-stage detector (student); OpenSeg learns the open-vocabulary capacity from weakly-supervised learning on image caption datasets, where the model learns visual-semantic alignments by aligning the words in a caption to predicted masks. Both models learn the localization ability from class-agnostic training on base categories using very different network architectures. ViLD achieves 26.3 APr and 27.6 AP on LVIS and COCO's novel categories respectively. It also directly transfers to other detection datasets without finetuning. Trained on COCO and Localized Narrative, OpenSeg directly transfers to Ade20k (847 and 150 categories), Pascal Context (459 and 59 categories) with superior performance.</a>
</td>
</tr>
<tr>
<td>
Yu Cheng is a Principal Researcher at Microsoft Research. Before joining Microsoft, he was a Research Staff Member at IBM Research & MIT-IBM Watson AI Lab. He got a Ph.D. degree from Northwestern University in 2015 and a bachelor’s degree from Tsinghua University in 2010. His research covers deep learning in general, with specific interests in model compression and efficiency, deep generative models, and adversarial robustness. Currently, he focuses on productionizing these techniques to solve challenging problems in CV, NLP, and Multimodal. Yu is serving (or, has served) as an area chair for CVPR, NeurIPS, AAAI, IJCAI, ACMMM, WACV, and ECCV.
</td>
<td>
<a><em>Towards data efficient vision-language (VL) models</em></a>
<br><a>Language transformers have shown remarkable performance on natural language understanding tasks. However, these gigantic VL models are hard to deploy for real-world applications due to their impractically huge model size and the requirement for downstream fine-tuning data. In this talk, I will first present FewVLM, a few-shot prompt-based learner on vision-language tasks. FewVLM is trained with both prefix language modeling and masked language modeling and utilizes simple prompts to improve zero/few-shot performance on VQA and image captioning. Then I will introduce Grounded-FewVLM, a new version that learns object grounding and localization in pre-training and can adapt to diverse grounding tasks. The models have been evaluated on various zero-/few-shot VL tasks and the results show that they consistently surpass the state-of-the-art few-shot methods.</a>
</td>
</tr>
<tr>
<td>
Yinfei Yang is a research manager at Apple AI/ML working on general visual intelligence. Previously he was a staff research scientist at Google research working on various NLP and Computer Vision problems. Before Google, He worked at Redfin and Amazon as research engineers for machine learning and computer vision problems. Prior to that, He was a graduate student in computer science at UPenn. He received his master's in computer science. His research focuses on image and text representation learning for retrieval and transferring tasks. He is generally interested in problems in Computer Vision, Natural Language Processing, or combined.
</td>
<td>
<a><em>Learning Visual and Vision-Language Model With Noisy Image Text Pairs</em></a>
<br><a>Pre-training has become a key tool in state-of-the-art computer vision and language machine learning models. The benefits of very large-scale supervised pre-training were first demonstrated by By Bert in the language community, and BiT and ViT models in the vision community. However, popular vision-language datasets like Conceptual Captions, MSCOCO usually involve a non-trivial data collection and cleaning process, which limits the size of datasets and hence limits the large scale training of image-text models. In recent work, researchers leverage the noisy dataset of over billions of image alt-text pairs mined from the Web as pre-training. The resulting models have shown incredible performance on various visual and vision language tasks, including image-text retrieval, captioning, visual question answering et.c. In addition, researchers also show that the visual representations learned from noise text supervision achieves the state-of-the-art level results on various vision tasks including image classification, semantic segmentation, object detection etc.</a>
</td>
</tr>
<tr>
<td>
Leonid Karlinsky is a Principal Research Scientist (STSM) in the MIT-IBM lab. Prior to that Leonid led the
AI Vision research group in the Multimedia department @ IBM Research AI. Leonid joined IBM Research
in July 2015. Before joining IBM, he served as a research scientist in Applied Materials, Elbit, and FDNA.
He is actively publishing, reviewing, and performing occasional chair duties at ECCV, ICCV, CVPR, ICLR,
AAAI, WACV, and NeurIPS, and served as an IMVC steering committee member for the past 6 years.
During his time at IBM, Leonid has co-authored over 30 research papers in the areas of augmented
reality, medical applications, self-supervised, cross-domain, multi-modal, and few-shot learning. He
received his PhD degree at the Weizmann Institute of Science, supervised by Prof. Shimon Ullman.
</td>
<td>
<a><em>Different facets of limited supervision – on coarse- / weakly- / cross-domain- / and self- supervised learning</em></a>
<br><a>Limited Supervision can assume many interesting and practical forms beyond (the very popular) classical few-shot learning. In this talk I would touch upon four of our recent works covering a range of alternative limited supervision tasks. We will considered learning with weak supervision (incomplete or noisy labeling, such as image level class labels for training a few-shot detector or image level captions for training a zero-shot grounding model); coarse-to-fine few-shot learning – where pre-training annotations are coarse (e.g. broad vehicle types such as car, truck, bus, etc) while the target novel classes for few-shot learning are fine-grained (e.g. specific models of cars); self-supervised cross-domain learning – where we want to semantically align learned representations between different domains without any labels in any of the domains; and self-supervised classification – discovering novel classes without any supervision.</a>
</td>
</tr>
<tr>
<td>
Sharon Yixuan Li is an Assistant Professor in the Department of Computer Sciences at the University of Wisconsin Madison. Her broad research interests are in deep learning and machine learning. Her research focuses on learning and inference under distributional shifts and open-world machine learning. Previously she was a postdoc research fellow in the Computer Science department at Stanford AI Lab. She completed her Ph.D. from Cornell University in 2017, where she was advised by John E. Hopcroft. She led the organization of the ICML workshop on Uncertainty and Robustness in Deep Learning in 2019 and 2020. She is the recipient of several awards, including the Facebook Research Award, Amazon Research Award, and was named Forbes 30Under30 in Science.
</td>
<td>
<a><em>How to Handle Data Shifts? Challenges, Research Progress and Path Forward</em></a>
<br><a>The real world is open and full of unknowns, presenting significant challenges for machine learning systems that must reliably handle diverse, and sometimes anomalous inputs. Out-of-distribution (OOD) uncertainty arises when a machine learning model sees a test-time input that differs from its training data, and thus should not be predicted by the model. As machine learning is used for more safety-critical domains, the ability to handle out-of-distribution data is central in building open-world learning systems. In this talk, I will talk about challenges, research progress, and future opportunities in detecting OOD samples for safe and reliable predictions in an open world.</a>
</td>
</tr>
<tr>
<td>
Bharath Hariharan is an assistant professor of Computer Science at Cornell University, where he works on all things computer vision, but focusing on problems where data challenges prevail. He is a recipient of the NSF CAREER award as well as the PAMI Young Researcher award.
</td>
<td>
<a><em>When life gives you lemons: Making lemonade from limited labels</em></a>
<br><a>Many research directions have been proposed for dealing with the limited availability of labeled data in many domains, including zero-shot learning, few-shot learning, semi-supervised learning and self-supervised learning. However, I argue that in spite of the volume of research in these paradigms, existing approaches discard vital domain knowledge that can prove useful in learning.</a>
<br>
<br> <a>I will show two case studies where thinking about where the data comes from in the problem domain leads to substantial improvements in accuracy. The first case study will look at the domain of self-driving, and will show how leveraging domain knowledge can allow systems to automatically discover objects and train detectors with no labels at all. The second study will look at zero-shot learning, where digging deeper into the provenance of class descriptions yields surprising and useful insight.</a>
</td>
</tr>
<tr>
<td>
Ishan Misra is a Research Scientist at FAIR, Meta AI where he works on computer vision. His interests are primarily in learning visual representations with limited supervision - using self-supervised, and weakly supervised learning. For his work in self-supervised learning, Ishan was features in MIT Tech Review’s list of 35 innovators under 35 compiled globally across all areas of technology. You can hear about his work at length on Lex Fridman’s podcast.
</td>
<td>
<a><em>General purpose visual recognition across modalities with limited supervision</em></a>
<br><a>Modern computer vision models are good at specialized tasks. Given the right architecture, right supervision, supervised learning can yield great specialist models. However, specialist models also have severe limitations — they can only do what they are trained for and require copious amounts of pristine supervision for it. In this talk, I’ll focus on two limitations: specialist models cannot work on tasks beyond what they saw training labels for, or on new types of visual data. I’ll present our recent efforts that design better architectures, training paradigms and loss functions to address these issues.</a>
<br>
<br><a>Our first work, called Omnivore, presents a single model that can operate on images, videos, and single-view 3D data. Omnivore leads to shared representations across visual modalities, without using paired input data. Omnivore can also be trained in a self-supervised manner. I’ll conclude the talk with Detic, a simple way to train large-vocabulary detectors using image-level labs which leads to a 20,000+ class detector.</a>
</td>
</tr>
<tr>
<td>
Dr. Holger Caeser is is an Assistant Professor at the Intelligent Vehicles group of TU Delft in the Netherlands. Holger's research interests are in the area of Autonomous Vehicle perception and prediction, with a particular focus on scalability of learning and annotation approaches. Previously Holger was a Principal Research Scientist at an autonomous vehicle company called Motional (formerly nuTonomy). There he started 3 teams with 20+ members that focused on Data Annotation, Autolabeling and Data Mining. Holger also developed the influential autonomous driving datasets nuScenes and nuPlan and contributed to the commonly used PointPillars baseline for 3d object detection from lidar data. He received his PhD in Computer Vision from the University of Edinburgh in Scotland under Prof. Dr. Vittorio Ferrari and studied in Germany and Switzerland (KIT Karlsruhe, EPF Lausanne, ETH Zurich).
</td>
<td>
<a><em>Autonomous vehicles from imperfect and limited labels</em></a>
<br><a>The past decade has seen enormous progress in autonomous vehicle performance due to new sensors, large scale datasets and ever deeper models. Yet this progress is fueled by human annotators manually labeling every object in painstaking detail. Newly released datasets now focus on more specific subproblems rather than fully labelling ever larger amounts of data. In this talk I will talk about how we developed an Offline Perception system to autolabel a 250x times larger dataset called nuPlan. This dataset serves as the world's first real-world ML planning benchmark. By combining real-world data with a closed-loop simulation framework, we get the best of both world's - realism and reactivity. I will discuss the role of imperfect (perception) data in planning and prediction and highlight the importance of up-to-date maps. I conclude that it is essential to detect these imperfections, quantify their impact and develop robust models that are able to learn from this data.</a>
</td>
</tr>
</tbody>
</table>
</div>
</div>
<br><br>
<div class="col-lg-12" id="schedule">
<p style="text-align: center">All talks and Q&A are now available at the <a href="https://www.youtube.com/channel/UC2of8xmN_238SaqJB3ryv0w/playlists"> <span class="link">YouTube Channel</a>. Please feel free to contact us if you have any suggestions to improve our workshop!    <strong>l2idcvpr@gmail.com</strong> </p>
<div class="section-title">
<h2>Schedule</h2>
<h3>(All times in IDT Time Zone)</h3>
</div>
<div class="trend-entry d-flex">
<table align="center" class="table table-hover">
<thead>
<tr>
<th scope="col"> Date </th>
<th scope="col"> Speaker</th>
<th scope="col"> Topic</th>
</tr>
</thead>
<tbody>
<tr>
<td>9:00-9:10</td>
<td>Organizers</td>
<td>Introduction and opening</td>
</tr>
<tr>
<td>9:10-9:40</td>
<td>Bharath Hariharan</a>
<br></td>
<td>When life gives you lemons: Making lemonade from limited labels</td>
</tr>
<tr>
<td>9:40-10:10</td>
<td>Ishan Misra</td>
<td>General purpose visual recognition across modalities with limited supervision</td>
</tr>
<tr>
<td>10:10-10:40</td>
<td>Leonid Karlinsky</td>
<td>Different facets of limited supervision – on coarse- / weakly- / cross-domain- / and self- supervised learning</td>
</tr>
<tr>
<td>10:40-11:00</td>
<td>Boyi Li</td>
<td>SITTA: Single Image Texture Translation for Data Augmentation</td>
</tr>
<tr>
<td>11:00-11:20</td>
<td>Yabiao Wang</td>
<td>Learning from Noisy Labels with Coarse-to-Fine Sample Credibility Modeling</td>
</tr>
<tr>
<td>11:20-11:40</td>
<td>Rabab Abdelfattah</td>
<td>PLMCL: Partial-Label Momentum Curriculum Learning for Multi-label Image Classification</td>
</tr>
<tr>
<td>Online Only</td>
<td>Jiageng Zhu</td>
<td>SW-VAE: Weakly Supervised Learn Disentangled Representation Via Latent Factor Swapping</td>
</tr>
<tr>
<td>Online Only</td>
<td>Vadim Sushko</td>
<td>One-Shot Synthesis of Images and Segmentation Masks</td>
</tr>
<tr>
<td>Online Only</td>
<td>Ruiwen Li</td>
<td>TransCAM: Transformer Attention-based CAM Refinement for Weakly Supervised Semantic Segmentation</td>
</tr>
<tr>
<td>11:40-12:00</td>
<td>Quoc-Huy Tran</td>
<td>Timestamp-Supervised Action Segmentation with Graph Convolutional Networks</td>
</tr>
<tr>
<td>12:00-12:30</td>
<td>Noel,Zsolt,Kyle</td>
<td>Live Q&A / Panel Discussion</td>
</tr>
<tr>
<td>12:30-13:00</td>
<td>Xiuye Gu</td>
<td>Open-Vocabulary Detection and Segmentation</td>
</tr>
<tr>
<td>13:00-13:30</td>
<td>Yinfei Yang</td>
<td>Learning Visual and Vision-Language Model With Noisy Image Text Pairs</td>
</tr>
<tr>
<td>13:30-14:00</td>
<td>Yu Cheng</td>
<td>Towards data efficient vision-language (VL) models</td>
</tr>
<tr>
<td>14:00-14:20</td>
<td>Nir Zabari</td>
<td>Open-Vocabulary Semantic Segmentation using Test-Time Distillation</td>
</tr>
<tr>
<td>14:20-14:40</td>
<td>Niv Cohen</td>
<td>"This is my unicorn, Fluffy": Personalizing frozen vision-language representations</td>
</tr>
<tr>
<td>14:40-15:10</td>
<td>Noel,Zsolt,Kyle</td>
<td>Live Q&A / Panel Discussion</td>
</tr>
<tr>
<td>15:10-15:40</td>
<td>Sharon Li</td>
<td>How to Handle Data Shifts? Challenges, Research Progress and Path Forward</td>
</tr>
<tr>
<td>15:40-16:10</td>
<td>Holger Caeser</td>
<td>Autonomous vehicles from imperfect and limited labels</td>
</tr>
<tr>
<td>16:10-16:30</td>
<td>Niv Cohen</td>
<td>Out-of-Distribution Detection Without Class Labels</td>
</tr>
<tr>
<td>Online Only</td>
<td>Jongjin Park</td>
<td>OpenCoS: Contrastive Semi-supervised Learning for Handling Open-set Unlabeled Data</td>
</tr>
<tr>
<td>Online Only</td>
<td>Andong Tan</td>
<td>Unsupervised Domain Adaptive Object Detection with Class Label Shift Weighted Local Features</td>
</tr>
<tr>
<td>Online Only</td>
<td>Abhay Rawat</td>
<td>Semi-Supervised Domain Adaptation by Similarity based Pseudo-label Injection</td>
</tr>
<tr>
<td>Online Only</td>
<td>SangYun Lee</td>
<td>Learning Multiple Probabilistic Degradation Generators for Unsupervised Real World Image Super Resolution</td>
</tr>
<tr>
<td>16:30-17:00</td>
<td>Noel,Zsolt,Kyle</td>
<td>Live Q&A / Panel Discussion</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<p><input id="idg-io-safe-browsing-enabled" type="hidden" /></p>
<div class="col-lg-12" id="dates" style="padding-top:150px;margin-top:-80px;">
<div class="section-title">
<h2>Important Dates</h2>
</div>
<div class="trend-entry d-flex">
<table class="table table-striped">
<thead>
<tr>
<th scope="col"> Description</th>
<th scope="col"> Date</th>
</tr>
</thead>
<tbody>
<tr>
<td>Paper submission deadline</td>
<td>July 15th, 2022</td>
</tr>
<tr>
<td>Workshop Date</td>
<td>October 23, 2022</td>
</tr>
<!-- <tr>
<td>Poster</td>
<td>10:00-11:00, June 16, 2021</td>
</tr> -->
</tbody>
</table>
</div>
</div>
<div class="col-lg-12" id="people" style="padding-top:80px;margin-top:-80px;">
<div class="section-title">
<h2>People</h2>
</div>
<div class="row justify-content-md-center">
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="images/Noel-Codella-sq.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><span class="d-block"><a href="https://www.linkedin.com/in/noel-c-f-codella-ph-d-1b1b1723/">Noel C. F. Codella</a> </span>
<span class="date-read">ncodella AT microsoft.com </span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" , src="images/shuai_zheng.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://kylezheng.org/">Shuai (Kyle) Zheng</a> </span>
<span class="date-read">szhengcvpr AT gmail.com</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://www.cc.gatech.edu/~zk15/images/kira_0.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://www.cc.gatech.edu/~zk15/">Zsolt Kira</a> </span>
<span class="date-read">zkira AT gatech.edu</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="images/CMMC.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://mmcheng.net/cmm/">Ming-Ming Cheng</a> </span>
<span class="date-read">cmm AT nankai.edu.cn</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://www.cc.gatech.edu/~judy/images/Judy_Hoffman.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://www.cc.gatech.edu/~judy/">Judy Hoffman</a> </span>
<span class="date-read"> judy AT gatech.edu</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://eccv2022.ecva.net/files/2021/12/tommasi.png" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="http://www.tatianatommasi.com/">Tatiana Tommasi</a> </span>
<span class="date-read">tatiana.tommasi AT polito.it</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://xjqi.github.io/pics/cropped_img" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://xjqi.github.io/">Xiaojuan Qi</a> </span>
<span class="date-read">xjqi AT eee.hku.hk</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://www.robots.ox.ac.uk/~sadeep/images/sadeep.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://www.robots.ox.ac.uk/~sadeep/">Sadeep Jayasumana</a> </span>
<span class="date-read">sadeep.jay AT gmail.com</span>
</div>
</div>
</div>
</div>
<div class="row justify-content-md-center" style="margin-top: 40px">
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://yunhuiguo.github.io/img/pic.jpg" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"><a href="https://yunhuiguo.github.io/">Yunhui Guo</a> </span>
<span class="date-read">yug185 AT eng.ucsd.edu</span>
</div>
</div>
</div>
<div class="col-md-3">
<div class="card">
<img class="rounded-circle" src="https://virajprabhu.github.io/images/me4.png" height="175" width="100%"/>
<div class="post-meta">
<span class="d-block"> <a href="https://virajprabhu.github.io/">Prabhu, Viraj Uday</span>
<span class=""date-read>virajp AT gatech.edu</span>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="col-lg-12">
<div style="display:inline-block;width:500px;">
<script type="text/javascript" src="//rc.rev
olvermaps.com/0/0/7.js?i=2hlmeh3dic1&m=0&c=ff0000&cr1=ffffff&br=19&sx=0"
async="async"></script>
</div>
</div>
</div>
</div>
<!-- END section -->
<div class="footer">
<div class="container">
<div class="row">
<div class="col-12">
<div class="copyright">
<p>
<!-- Link back to Colorlib can't be removed. Template is licensed under CC BY 3.0. -->
Copyright ©<script>document.write(new Date().getFullYear());</script>
All rights reserved | This template is made with <i class="icon-heart text-danger"
aria-hidden="true"></i> by <a
href="https://colorlib.com" target="_blank">Colorlib</a>
<!-- Link back to Colorlib can't be removed. Template is licensed under CC BY 3.0. -->
</p>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- .site-wrap -->
<!-- loader -->
<div id="loader" class="show fullscreen">
<svg class="circular" width="48px" height="48px">
<circle class="path-bg" cx="24" cy="24" r="22" fill="none" stroke-width="4" stroke="#eeeeee"/>
<circle class="path" cx="24" cy="24" r="22" fill="none" stroke-width="4" stroke-miterlimit="10"
stroke="#ff5e15"/>
</svg>
</div>
<script src="js/jquery-3.3.1.min.js"></script>
<script src="js/jquery-migrate-3.0.1.min.js"></script>
<script src="js/jquery-ui.js"></script>
<script src="js/popper.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script src="js/owl.carousel.min.js"></script>
<script src="js/jquery.stellar.min.js"></script>
<script src="js/jquery.countdown.min.js"></script>
<script src="js/bootstrap-datepicker.min.js"></script>
<script src="js/jquery.easing.1.3.js"></script>
<script src="js/aos.js"></script>
<script src="js/jquery.fancybox.min.js"></script>
<script src="js/jquery.sticky.js"></script>
<script src="js/jquery.mb.YTPlayer.min.js"></script>
<script src="js/main.js"></script>
</body>
</html>