{"id":"https://openalex.org/W4366308446","doi":"https://doi.org/10.1109/ictai56018.2022.00046","title":"Exploring Heterogeneous Feature Representation for Document Layout Understanding","display_name":"Exploring Heterogeneous Feature Representation for Document Layout Understanding","publication_year":2022,"publication_date":"2022-10-01","ids":{"openalex":"https://openalex.org/W4366308446","doi":"https://doi.org/10.1109/ictai56018.2022.00046"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictai56018.2022.00046","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110832205","display_name":"Guosheng Feng","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"funder","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Guosheng Feng","raw_affiliation_strings":["Ecole Polytechnique Federale de Lausanne (EPFL), Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"Ecole Polytechnique Federale de Lausanne (EPFL), Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061321029","display_name":"Danqing Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Danqing Huang","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090151187","display_name":"Chin-Yew Lin","orcid":"https://orcid.org/0000-0002-0798-6365"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"funder","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chin-Yew Lin","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035478328","display_name":"Damjan Dakic","orcid":null},"institutions":[],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Damjan Dakic","raw_affiliation_strings":["Microsoft, Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Belgrade, Serbia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009517484","display_name":"Milos Milunovic","orcid":null},"institutions":[],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Milos Milunovic","raw_affiliation_strings":["Microsoft, Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Belgrade, Serbia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103218676","display_name":"Tamara Stankovic","orcid":"https://orcid.org/0009-0007-0644-7398"},"institutions":[],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Tamara Stankovic","raw_affiliation_strings":["Microsoft, Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Belgrade, Serbia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063172118","display_name":"Igor Ilic","orcid":"https://orcid.org/0009-0002-5119-7993"},"institutions":[],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Igor Ilic","raw_affiliation_strings":["Microsoft, Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Microsoft, Belgrade, Serbia","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.13,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.301896,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":59,"max":69},"biblio":{"volume":null,"issue":null,"first_page":"267","last_page":"275"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9933,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9926,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.749155},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature Learning","score":0.7050708},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.58169806},{"id":"https://openalex.org/keywords/representation","display_name":"Representation","score":0.5702479}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.79944646},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.749155},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.7050708},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.58169806},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5702479},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49466082},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4515641},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.089434385},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictai56018.2022.00046","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.63}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":17,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2072747082","https://openalex.org/W2170288539","https://openalex.org/W2187089797","https://openalex.org/W2295598076","https://openalex.org/W2768348081","https://openalex.org/W2986619406","https://openalex.org/W2999615587","https://openalex.org/W3104953317","https://openalex.org/W3113753692","https://openalex.org/W3122890974","https://openalex.org/W3132623428","https://openalex.org/W3174086521","https://openalex.org/W3176664887","https://openalex.org/W3176851559","https://openalex.org/W3204562006","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4390569940","https://openalex.org/W4388422664","https://openalex.org/W4310278675","https://openalex.org/W4298151006","https://openalex.org/W4289536128","https://openalex.org/W3164948662","https://openalex.org/W3153597579","https://openalex.org/W2905433371","https://openalex.org/W2905271011","https://openalex.org/W2888392564"],"abstract_inverted_index":{"There":[0],"are":[1],"increasing":[2],"interests":[3],"in":[4,26,31,63,113],"document":[5,33,122],"layout":[6,34,136,144],"representation":[7,94],"learning":[8,131],"and":[9,22,38,45,89,151,168],"understanding.":[10],"Transformer,":[11],"with":[12],"its":[13,46],"great":[14],"power,":[15],"has":[16],"become":[17],"the":[18,64,72,93,114,119,135],"mainstream":[19],"model":[20],"architecture":[21],"achieved":[23],"promising":[24],"results":[25,155],"this":[27,78,97],"area.":[28],"As":[29],"elements":[30],"a":[32,101],"consist":[35],"of":[36,121],"multi-modal":[37],"multi-dimensional":[39],"features":[40,88],"such":[41],"as":[42],"position,":[43],"size,":[44],"text":[47],"content,":[48],"prior":[49],"works":[50],"represent":[51],"each":[52],"element":[53,148],"by":[54,118],"summing":[55],"all":[56],"feature":[57,111],"embeddings":[58],"into":[59,71],"one":[60],"unified":[61],"vector":[62],"input":[65],"layer,":[66],"which":[67],"is":[68],"then":[69],"fed":[70],"self-attention":[73],"for":[74],"element-wise":[75],"interaction.":[76],"However,":[77],"simple":[79],"summation":[80],"would":[81],"potentially":[82],"raise":[83],"mixed":[84],"correlations":[85],"among":[86],"heterogeneous":[87],"bring":[90],"noise":[91],"to":[92,107,133],"learning.":[95],"In":[96],"paper,":[98],"we":[99,127,164],"propose":[100,128],"novel":[102],"two-step":[103],"disentangled":[104],"attention":[105],"mechanism":[106],"allow":[108],"more":[109],"flexible":[110],"interactions":[112],"self-attention.":[115],"Furthermore,":[116],"inspired":[117],"principles":[120],"design":[123],"(e.g.,":[124],"contrast,":[125],"proximity),":[126],"an":[129],"unsupervised":[130],"objective":[132],"constrain":[134],"representations.":[137],"We":[138],"verify":[139],"our":[140,158,173],"approach":[141,159],"on":[142],"two":[143],"understanding":[145],"tasks,":[146],"namely":[147],"role":[149],"labeling":[150],"image":[152],"captioning.":[153],"Experiment":[154],"show":[156],"that":[157],"achieves":[160],"state-of-the-art":[161],"performances.":[162],"Moreover,":[163],"conduct":[165],"extensive":[166],"studies":[167],"observe":[169],"better":[170],"interpretability":[171],"using":[172],"approach.":[174]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4366308446","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-16T05:45:40.503840","created_date":"2023-04-20"}