{"id":"https://openalex.org/W4320855201","doi":"https://doi.org/10.48550/arxiv.2210.04885","title":"What the DAAM: Interpreting Stable Diffusion Using Cross Attention","display_name":"What the DAAM: Interpreting Stable Diffusion Using Cross Attention","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4320855201","doi":"https://doi.org/10.48550/arxiv.2210.04885"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.04885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2210.04885","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101066425","display_name":"Raphael Tang","orcid":"https://orcid.org/0009-0007-2873-892X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Raphael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035827708","display_name":"Linqing Liu","orcid":"https://orcid.org/0000-0001-6369-5645"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Linqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101710566","display_name":"Akshat Pandey","orcid":"https://orcid.org/0000-0002-2797-4708"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pandey, Akshat","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101513734","display_name":"Zhiying Jiang","orcid":"https://orcid.org/0000-0001-8840-7056"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Zhiying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068041604","display_name":"Gefei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Gefei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011562681","display_name":"Karun Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Karun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049427071","display_name":"Pontus Stenetorp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stenetorp, Pontus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082997975","display_name":"Jimmy Lin","orcid":"https://orcid.org/0000-0002-0661-7189"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Jimmy","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5077656815","display_name":"Ferhan T\u00fcre","orcid":"https://orcid.org/0000-0002-5585-157X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ture, Ferhan","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.824796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":75,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.971,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.971,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9556,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9391,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.6592415},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.45551082}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70142233},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.6592415},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.54587656},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.50341886},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.4750885},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46976945},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.45551082},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13989237},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.04885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.04885","pdf_url":"http://arxiv.org/pdf/2210.04885","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2210.04885","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2210.04885","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.75,"display_name":"Quality education","id":"https://metadata.un.org/sdg/4"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4390569940","https://openalex.org/W4388422664","https://openalex.org/W4361193272","https://openalex.org/W4312407344","https://openalex.org/W4310278675","https://openalex.org/W4226258012","https://openalex.org/W2963326959","https://openalex.org/W2905433371","https://openalex.org/W2894289927","https://openalex.org/W2888392564"],"abstract_inverted_index":{"Large-scale":[0],"diffusion":[1,145],"neural":[2],"networks":[3],"represent":[4],"a":[5,24,31,116,148],"substantial":[6],"milestone":[7],"in":[8,47,92],"text-to-image":[9],"generation,":[10],"but":[11],"they":[12],"remain":[13],"poorly":[14],"understood,":[15],"lacking":[16],"interpretability":[17],"analyses.":[18],"In":[19],"this":[20],"paper,":[21],"we":[22,40,108,122,138],"perform":[23],"text-image":[25],"attribution":[26,38,72],"analysis":[27],"on":[28,65,74,118],"Stable":[29],"Diffusion,":[30],"recently":[32],"open-sourced":[33],"model.":[34],"To":[35,135],"produce":[36],"pixel-level":[37],"maps,":[39],"upscale":[41],"and":[42,129],"aggregate":[43],"cross-attention":[44],"word-pixel":[45],"scores":[46],"the":[48,88,93,140],"denoising":[49],"subnetwork,":[50],"naming":[51],"our":[52,136],"method":[53],"DAAM.":[54],"We":[55,82],"evaluate":[56],"its":[57,61,70],"correctness":[58],"by":[59,80],"testing":[60],"semantic":[62,111],"segmentation":[63],"ability":[64],"nouns,":[66],"as":[67,69],"well":[68],"generalized":[71],"quality":[73,128],"all":[75],"parts":[76],"of":[77,90,155],"speech,":[78],"rated":[79],"humans.":[81],"then":[83],"apply":[84],"DAAM":[85],"to":[86,142],"study":[87,109],"role":[89],"syntax":[91],"pixel":[94],"space,":[95],"characterizing":[96],"head--dependent":[97],"heat":[98],"map":[99],"interaction":[100],"patterns":[101],"for":[102],"ten":[103],"common":[104],"dependency":[105],"relations.":[106],"Finally,":[107],"several":[110],"phenomena":[112],"using":[113],"DAAM,":[114],"with":[115],"focus":[117],"feature":[119],"entanglement,":[120],"where":[121],"find":[123],"that":[124],"cohyponyms":[125],"worsen":[126],"generation":[127],"descriptive":[130],"adjectives":[131],"attend":[132],"too":[133],"broadly.":[134],"knowledge,":[137],"are":[139],"first":[141],"interpret":[143],"large":[144],"models":[146],"from":[147],"visuolinguistic":[149],"perspective,":[150],"which":[151],"enables":[152],"future":[153],"lines":[154],"research.":[156],"Our":[157],"code":[158],"is":[159],"at":[160],"https://github.com/castorini/daam.":[161]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4320855201","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2025-04-25T12:35:53.523268","created_date":"2023-02-16"}