{"id":"https://openalex.org/W4309803474","doi":"https://doi.org/10.48550/arxiv.2211.11381","title":"LISA: Localized Image Stylization with Audio via Implicit Neural Representation","display_name":"LISA: Localized Image Stylization with Audio via Implicit Neural Representation","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4309803474","doi":"https://doi.org/10.48550/arxiv.2211.11381"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.11381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2211.11381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100407263","display_name":"Seung Hyun Lee","orcid":"https://orcid.org/0000-0002-1109-6787"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Seung Hyun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075621908","display_name":"Chanyoung Kim","orcid":"https://orcid.org/0000-0003-2749-8163"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Chanyoung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025693806","display_name":"Wonmin Byeon","orcid":"https://orcid.org/0000-0002-4780-4749"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Byeon, Wonmin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023718442","display_name":"Sang Ho Yoon","orcid":"https://orcid.org/0000-0003-2771-1593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yoon, Sang Ho","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061842716","display_name":"Jinkyu Kim","orcid":"https://orcid.org/0000-0001-6520-2074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Jinkyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5077788107","display_name":"Sangpil Kim","orcid":"https://orcid.org/0000-0002-7349-0018"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Sangpil","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":59},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9679,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9675,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.611859},{"id":"https://openalex.org/keywords/scene-statistics","display_name":"Scene statistics","score":0.42407864}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78662074},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6967542},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.66134554},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.65713114},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.65614206},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.611859},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5479857},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.52596736},{"id":"https://openalex.org/C197654239","wikidata":"https://www.wikidata.org/wiki/Q7430757","display_name":"Scene statistics","level":3,"score":0.42407864},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.10704449},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.11381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.11381","pdf_url":"http://arxiv.org/pdf/2211.11381","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2211.11381","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2211.11381","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4365135359","https://openalex.org/W4311804456","https://openalex.org/W2951819827","https://openalex.org/W2849310602","https://openalex.org/W2549990292","https://openalex.org/W2345479200","https://openalex.org/W2312145515","https://openalex.org/W2183306018","https://openalex.org/W2081900870","https://openalex.org/W1834370135"],"abstract_inverted_index":{"We":[0,124],"present":[1],"a":[2,33,60,79,82,110,167],"novel":[3],"framework,":[4],"Localized":[5],"Image":[6],"Stylization":[7],"with":[8,114,132,161,199],"Audio":[9],"(LISA)":[10],"which":[11],"performs":[12],"audio-driven":[13],"localized":[14,154],"image":[15,43,52,55,65,96],"stylization.":[16],"Sound":[17],"often":[18],"provides":[19,84],"information":[20],"about":[21],"the":[22,26,37,50,64,90,94,102,133,139,153,162,174,178,192,200],"specific":[23],"context":[24],"of":[25,36,63,169],"scene":[27,38,143,196],"and":[28,97,189],"is":[29,70],"closely":[30],"related":[31],"to":[32,88,137,157],"certain":[34],"part":[35,62],"or":[39,56,105,142,195],"object.":[40],"However,":[41],"existing":[42],"stylization":[44,181],"works":[45],"have":[46],"focused":[47],"on":[48,67,145],"stylizing":[49,101],"entire":[51],"using":[53],"an":[54,85,115],"text":[57],"input.":[58,165,203],"Stylizing":[59],"particular":[61],"based":[66,144],"audio":[68,86,164,202],"input":[69,87,95],"natural":[71],"but":[72],"challenging.":[73],"In":[74],"this":[75],"work,":[76],"we":[77,171],"propose":[78],"framework":[80,176],"that":[81,173],"user":[83],"localize":[89],"sound":[91,146],"source":[92],"in":[93,197],"another":[98],"for":[99],"locally":[100],"target":[103,140,193],"object":[104,141,194],"scene.":[106],"LISA":[107,184],"first":[108],"produces":[109],"delicate":[111],"localization":[112,117,135,187],"map":[113,136],"audio-visual":[116],"network":[118],"by":[119],"leveraging":[120],"CLIP":[121],"embedding":[122],"space.":[123],"then":[125],"utilize":[126],"implicit":[127],"neural":[128],"representation":[129],"(INR)":[130],"along":[131],"predicted":[134],"stylize":[138],"information.":[147],"The":[148],"proposed":[149,175],"INR":[150],"can":[151],"manipulate":[152],"pixel":[155],"values":[156],"be":[158],"semantically":[159],"consistent":[160],"provided":[163],"Through":[166],"series":[168],"experiments,":[170],"show":[172],"outperforms":[177],"other":[179],"audio-guided":[180],"methods.":[182],"Moreover,":[183],"constructs":[185],"concise":[186],"maps":[188],"naturally":[190],"manipulates":[191],"accordance":[198],"given":[201]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4309803474","counts_by_year":[],"updated_date":"2025-03-03T01:51:37.185471","created_date":"2022-11-29"}