{"payload":{"pageCount":3,"repositories":[{"type":"Public","name":"Show-o","owner":"showlab","isFork":false,"description":"Repository for Show-o, One Single Transformer to Unify Multimodal Understanding and Generation.","allTopics":["multimodal","diffusion-models","large-language-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":13,"starsCount":754,"forksCount":36,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-11T14:11:48.075Z"}},{"type":"Public","name":"Awesome-Video-Diffusion","owner":"showlab","isFork":false,"description":"A curated list of recent diffusion models for video generation, editing, restoration, understanding, etc.","allTopics":["awesome","video-editing","video-understanding","video-generation","diffusion-models","text-to-video","video-restoration","text-to-motion"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":3123,"forksCount":188,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-11T04:54:31.187Z"}},{"type":"Public","name":"Awesome-Unified-Multimodal-Models","owner":"showlab","isFork":false,"description":"📖 This is a repository for organizing papers, codes and other resources related to unified multimodal models.","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":125,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-09T06:36:51.109Z"}},{"type":"Public","name":"Awesome-MLLM-Hallucination","owner":"showlab","isFork":false,"description":"📖 A curated list of resources dedicated to hallucination of multimodal large language models (MLLM).","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":351,"forksCount":10,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-09-05T10:25:20.103Z"}},{"type":"Public","name":"RingID","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":10,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-30T18:01:21.694Z"}},{"type":"Public","name":"Awesome-GUI-Agent","owner":"showlab","isFork":false,"description":"💻 A curated list of papers and resources for multi-modal Graphical User Interface (GUI) agents.","allTopics":["awesome","graphical-user-interface","ai-assistant","llm-agent","gui-agents"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":115,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-27T17:13:22.454Z"}},{"type":"Public","name":"MovieSeq","owner":"showlab","isFork":false,"description":"[ECCV2024] Learning Video Context as Interleaved Multimodal Sequences","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":15,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-26T00:12:34.749Z"}},{"type":"Public","name":"MotionDirector","owner":"showlab","isFork":false,"description":"[ECCV 2024 Oral] MotionDirector: Motion Customization of Text-to-Video Diffusion Models.","allTopics":["video-generation","diffusion-models","text-to-video","text-to-motion","text-to-video-generation","motion-customization"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":19,"starsCount":792,"forksCount":44,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-21T03:51:01.631Z"}},{"type":"Public","name":"GUI-Narrator","owner":"showlab","isFork":false,"description":"Repository of GUI Action Narrator","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-16T15:51:23.396Z"}},{"type":"Public","name":"videollm-online","owner":"showlab","isFork":false,"description":"VideoLLM-online: Online Video Large Language Model for Streaming Video (CVPR 2024)","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":8,"starsCount":173,"forksCount":23,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-15T18:55:36.819Z"}},{"type":"Public","name":"X-Adapter","owner":"showlab","isFork":false,"description":"[CVPR 2024] X-Adapter: Adding Universal Compatibility of Plugins for Upgraded Diffusion Model","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":17,"starsCount":724,"forksCount":43,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-14T10:19:32.308Z"}},{"type":"Public","name":"LOVA3","owner":"showlab","isFork":false,"description":"The official repo of \"Learning to Visual Question Answering, Asking and Assessment\"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":12,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-08-05T09:00:26.585Z"}},{"type":"Public","name":"afformer","owner":"showlab","isFork":false,"description":"Affordance Grounding from Demonstration Video to Target Image (CVPR 2023)","allTopics":["deep-learning","pytorch"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":6,"starsCount":37,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-26T16:23:25.474Z"}},{"type":"Public","name":"BoxDiff","owner":"showlab","isFork":false,"description":"[ICCV 2023] BoxDiff: Text-to-Image Synthesis with Training-Free Box-Constrained Diffusion","allTopics":["text-to-image-synthesis","diffusion-models"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":239,"forksCount":14,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-21T05:17:42.281Z"}},{"type":"Public","name":"cvpr2024-tutorial-video-diffusion-models","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-16T08:58:32.765Z"}},{"type":"Public","name":"DragAnything","owner":"showlab","isFork":false,"description":"[ECCV 2024] DragAnything: Motion Control for Anything using Entity Representation","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":19,"starsCount":403,"forksCount":13,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-07-02T04:04:48.797Z"}},{"type":"Public","name":"AssistGaze","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-25T14:02:53.113Z"}},{"type":"Public","name":"videogui","owner":"showlab","isFork":false,"description":"official repo of \"VideoGUI: A Benchmark for GUI Automation from Instructional Videos\"","allTopics":["gui","video-language","llm-agent"],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":19,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-16T17:47:16.220Z"}},{"type":"Public","name":"VisInContext","owner":"showlab","isFork":false,"description":"Official implementation of Leveraging Visual Tokens for Extended Text Contexts in Multi-Modal Learning","allTopics":["efficient","in-context-learning","llm","mllm"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":10,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-06-06T07:19:18.213Z"}},{"type":"Public","name":"cosmo","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":2,"starsCount":70,"forksCount":4,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-10T21:04:11.443Z"}},{"type":"Public","name":"EgoVLP","owner":"showlab","isFork":false,"description":"[NeurIPS2022] Egocentric Video-Language Pretraining","allTopics":["pretraining","video-language","egocentric-vision","pytorch"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":5,"starsCount":222,"forksCount":19,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-09T05:52:12.512Z"}},{"type":"Public","name":"UniVTG","owner":"showlab","isFork":false,"description":"[ICCV2023] UniVTG: Towards Unified Video-Language Temporal Grounding","allTopics":["video-summarization","video-grounding","pretraining","moment-retrieval","highlight-detection","video-language"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":19,"starsCount":314,"forksCount":28,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-08T15:15:34.433Z"}},{"type":"Public","name":"VisorGPT","owner":"showlab","isFork":false,"description":"[NeurIPS 2023] Customize spatial layouts for conditional image synthesis models, e.g., ControlNet, using GPT","allTopics":["image-generation","gpt","diffusion-models","controlnet"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":4,"starsCount":129,"forksCount":2,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-04T01:51:17.676Z"}},{"type":"Public","name":"Long-form-Video-Prior","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":23,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-03T03:09:13.211Z"}},{"type":"Public","name":"assistgui","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":1,"starsCount":21,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-16T16:04:38.993Z"}},{"type":"Public","name":"T2VScore","owner":"showlab","isFork":false,"description":"T2VScore: Towards A Better Metric for Text-to-Video Generation","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":3,"starsCount":76,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-10T04:13:33.125Z"}},{"type":"Public","name":"sparseformer","owner":"showlab","isFork":false,"description":"(ICLR 2024, CVPR 2024) SparseFormer ","allTopics":["computer-vision","transformer","efficient-neural-networks","vision-transformer","sparseformer"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":62,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-30T09:22:01.830Z"}},{"type":"Public","name":"VideoSwap","owner":"showlab","isFork":false,"description":"Code for [CVPR 2024] VideoSwap: Customized Video Subject Swapping with Interactive Semantic Point Correspondence","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":343,"forksCount":12,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T14:51:53.994Z"}},{"type":"Public","name":"Tune-An-Ellipse","owner":"showlab","isFork":false,"description":"[CVPR 2024] Tune-An-Ellipse: CLIP Has Potential to Find What You Want","allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":1,"starsCount":6,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-29T13:15:14.332Z"}},{"type":"Public","name":"magicanimate","owner":"showlab","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-25T08:52:04.001Z"}}],"repositoryCount":65,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"showlab repositories"}