{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T14:41:57Z","timestamp":1730212917023,"version":"3.28.0"},"reference-count":52,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,16]],"date-time":"2024-06-16T00:00:00Z","timestamp":1718496000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000002","name":"NIH","doi-asserted-by":"publisher","award":["R01HD104624-01A1"],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,6,16]]},"DOI":"10.1109\/cvpr52733.2024.00622","type":"proceedings-article","created":{"date-parts":[[2024,9,16]],"date-time":"2024-09-16T17:34:53Z","timestamp":1726508093000},"page":"6507-6516","source":"Crossref","is-referenced-by-count":0,"title":["RAVE: Randomized Noise Shuffling for Fast and Consistent Video Editing with Diffusion Models"],"prefix":"10.1109","author":[{"given":"Ozgur","family":"Kara","sequence":"first","affiliation":[{"name":"Georgia Tech"}]},{"given":"Bariscan","family":"Kurtkaya","sequence":"additional","affiliation":[{"name":"KUIS AI Center"}]},{"given":"Hidir","family":"Yesiltepe","sequence":"additional","affiliation":[{"name":"Virginia Tech"}]},{"given":"James M.","family":"Rehg","sequence":"additional","affiliation":[{"name":"Georgia Tech"}]},{"given":"Pinar","family":"Yanardag","sequence":"additional","affiliation":[{"name":"Virginia Tech"}]}],"member":"263","reference":[{"volume-title":"Civitai","year":"2023","key":"ref1"},{"volume-title":"Gridtrick","year":"2023","key":"ref2"},{"volume-title":"Pexels","year":"2023","key":"ref3"},{"volume-title":"Pixabay","year":"2023","key":"ref4"},{"volume-title":"Prolific","year":"2023","key":"ref5"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01767"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3592450"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19784-0_41"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02121"},{"key":"ref11","article-title":"Flatten: optical flow- guided attention for consistent text-to-video editing","author":"Cong","year":"2023","journal-title":"arXiv preprint"},{"article-title":"Diffedit: Diffusion-based semantic image editing with mask guidance","volume-title":"The Eleventh International Conference on Learning Representations","author":"Couairon","key":"ref12"},{"article-title":"An image is worth one word: Personalizing text-to-image gener- ation using textual inversion","volume-title":"The Eleventh International Conference on Learning Representations","author":"Gal","key":"ref13"},{"key":"ref14","article-title":"Tokenflow: Consistent diffusion features for consistent video editing","author":"Geyer","year":"2023","journal-title":"arXiv preprint"},{"article-title":"Prompt-to-prompt image editing with cross-attention control","volume-title":"The Eleventh Inter- national Conference on Learning Representations","author":"Hertz","key":"ref15"},{"key":"ref16","first-page":"6840","article-title":"Denoising dif- fusion probabilistic models","volume":"33","author":"Ho","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref17","article-title":"Imagen video: High definition video generation with diffusion mod- els","author":"Ho","year":"2022","journal-title":"arXiv preprint"},{"key":"ref18","first-page":"8633","article-title":"Video diffusion models","author":"Ho","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00686"},{"article-title":"Cogvideo: Large-scale pretraining for text-to-video generation via transformers","volume-title":"The Eleventh International Conference on Learning Representations","author":"Hong","key":"ref20"},{"volume-title":"Ebsynth: Fast example-based image syn- thesis and style transfer","year":"2018","author":"Jamriska","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323006"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3478513.3480546"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00582"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01462"},{"key":"ref26","first-page":"14317","article-title":"Shape-aware text-driven lay- ered video editing","volume-title":"Proceedings of the IEEE\/CVF Con- ference on Computer Vision and Pattern Recognition","author":"Lee"},{"key":"ref27","article-title":"Magicedit: High-fidelity and temporally coherent video editing","author":"Liew","year":"2023","journal-title":"arXiv preprint"},{"key":"ref28","article-title":"Video-p2p: Video editing with cross-attention control","author":"Liu","year":"2023","journal-title":"arXiv preprint"},{"key":"ref29","article-title":"Sdedit: Guided image synthesis and editing with stochastic differential equations","volume-title":"ICLR","author":"Meng","year":"2021"},{"key":"ref30","first-page":"6038","article-title":"Null-text inversion for editing real im- ages using guided diffusion models","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","author":"Mokady"},{"key":"ref31","article-title":"Dreamix: Video diffusion models are general video editors","author":"Molad","year":"2023","journal-title":"arXiv preprint"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1145\/3588432.3591513"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.85"},{"key":"ref34","first-page":"15932","article-title":"Fatezero: Fusing attentions for zero-shot text-based video editing","volume-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","author":"Chenyang"},{"key":"ref35","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref36","article-title":"Hierarchical text-conditional image generation with clip latents","author":"Ramesh","year":"2022","journal-title":"arXiv preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02155"},{"key":"ref40","first-page":"36479","article-title":"Photorealistic text-to-image diffusion models with deep language understanding","volume":"35","author":"Saharia","year":"2022","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref41","article-title":"Edit-a-video: Single video editing with object-aware consistency","author":"Shin","year":"2023","journal-title":"arXiv preprint"},{"article-title":"Make-a-video: Text-to-video generation without text-video data","volume-title":"The Eleventh International Conference on Learning Representations","author":"Singer","key":"ref42"},{"key":"ref43","first-page":"2256","article-title":"Deep unsupervised learning using nonequilibrium thermodynamics","volume-title":"International confer- ence on machine learning","author":"Sohl-Dickstein","year":"2015"},{"article-title":"Denoising diffusion implicit models","volume-title":"International Conference on Learning Representations","author":"Song","key":"ref44"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00191"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3592451"},{"key":"ref48","article-title":"Zero-shot video editing using off-the-shelf image diffusion models","author":"Wang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2003.819861"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618160"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"}],"event":{"name":"2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","start":{"date-parts":[[2024,6,16]]},"location":"Seattle, WA, USA","end":{"date-parts":[[2024,6,22]]}},"container-title":["2024 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10654794\/10654797\/10657005.pdf?arnumber=10657005","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,19]],"date-time":"2024-09-19T06:23:50Z","timestamp":1726727030000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10657005\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,16]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1109\/cvpr52733.2024.00622","relation":{},"subject":[],"published":{"date-parts":[[2024,6,16]]}}}