{"id":"https://openalex.org/W3037361235","doi":"https://doi.org/10.18653/v1/2020.acl-tutorials.6","title":"Multi-modal Information Extraction from Text, Semi-structured, and Tabular Data on the Web","display_name":"Multi-modal Information Extraction from Text, Semi-structured, and Tabular Data on the Web","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3037361235","doi":"https://doi.org/10.18653/v1/2020.acl-tutorials.6","mag":"3037361235"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.acl-tutorials.6","pdf_url":"https://www.aclweb.org/anthology/2020.acl-tutorials.6.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.aclweb.org/anthology/2020.acl-tutorials.6.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101406351","display_name":"Dong Xin","orcid":"https://orcid.org/0000-0002-1414-9354"},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xin Luna Dong","raw_affiliation_strings":["University of Washington","University of Washington Allen Institute for AI"],"affiliations":[{"raw_affiliation_string":"University of Washington Allen Institute for AI","institution_ids":["https://openalex.org/I4210140341","https://openalex.org/I201448701"]},{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082305994","display_name":"Hannaneh Hajishirzi","orcid":"https://orcid.org/0000-0002-1055-6657"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hannaneh Hajishirzi","raw_affiliation_strings":["University of Washington","University of Washington Allen Institute for AI"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]},{"raw_affiliation_string":"University of Washington Allen Institute for AI","institution_ids":["https://openalex.org/I4210140341","https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056881273","display_name":"Colin Lockard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Colin Lockard","raw_affiliation_strings":["University of Washington","University of Washington Allen Institute for AI"],"affiliations":[{"raw_affiliation_string":"University of Washington Allen Institute for AI","institution_ids":["https://openalex.org/I4210140341","https://openalex.org/I201448701"]},{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004318489","display_name":"Prashant Shiralkar","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I4210140341","display_name":"Allen Institute","ror":"https://ror.org/03cpe7c52","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210140341"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Prashant Shiralkar","raw_affiliation_strings":["University of Washington","University of Washington Allen Institute for AI"],"affiliations":[{"raw_affiliation_string":"University of Washington Allen Institute for AI","institution_ids":["https://openalex.org/I201448701","https://openalex.org/I4210140341"]},{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.861,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":7,"citation_normalized_percentile":{"value":0.816436,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":82,"max":84},"biblio":{"volume":null,"issue":null,"first_page":"23","last_page":"26"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/semi-structured-data","display_name":"Semi-structured data","score":0.5794462},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.5195761}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8343758},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.68306077},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6096027},{"id":"https://openalex.org/C40077939","wikidata":"https://www.wikidata.org/wiki/Q2336004","display_name":"Semi-structured data","level":3,"score":0.5794462},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.561389},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.5195761},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43191808},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41740137},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.36862832},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32809284},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2324408},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.16661182},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.09778884}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.acl-tutorials.6","pdf_url":"https://www.aclweb.org/anthology/2020.acl-tutorials.6.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2020.acl-tutorials.6","pdf_url":"https://www.aclweb.org/anthology/2020.acl-tutorials.6.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.83,"id":"https://metadata.un.org/sdg/4","display_name":"Quality education"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":22,"referenced_works":["https://openalex.org/W1565102206","https://openalex.org/W2083413555","https://openalex.org/W2092364718","https://openalex.org/W2097874932","https://openalex.org/W2107598941","https://openalex.org/W2111869785","https://openalex.org/W2163072729","https://openalex.org/W2400723282","https://openalex.org/W2604259521","https://openalex.org/W2613881683","https://openalex.org/W2805173585","https://openalex.org/W2805864708","https://openalex.org/W2889133671","https://openalex.org/W2891117443","https://openalex.org/W2898700358","https://openalex.org/W2922714365","https://openalex.org/W2923400109","https://openalex.org/W2935052563","https://openalex.org/W2955858358","https://openalex.org/W2962859618","https://openalex.org/W2962982640","https://openalex.org/W2970467549"],"related_works":["https://openalex.org/W4389912246","https://openalex.org/W4205553786","https://openalex.org/W36911888","https://openalex.org/W2942479669","https://openalex.org/W2405464607","https://openalex.org/W2281126075","https://openalex.org/W2142354878","https://openalex.org/W2034595671","https://openalex.org/W2030910246","https://openalex.org/W1978211327"],"abstract_inverted_index":{"The":[0],"World":[1],"Wide":[2],"Web":[3],"contains":[4],"vast":[5],"quantities":[6],"of":[7,46,76,98,109,137],"textual":[8,156,209,244,278],"information":[9,31,121,267],"in":[10,21,126,169,196,211,260],"several":[11],"forms:":[12],"unstructured":[13,146],"text,":[14],"template-based":[15],"semi-structured":[16,161,284],"webpages":[17],"(which":[18],"present":[19],"data":[20,54,68,189],"key-value":[22],"pairs":[23],"and":[24,26,35,56,101,129,172,183,220,283,292],"lists),":[25],"tables.":[27],"Methods":[28],"for":[29,295],"extracting":[30],"from":[32,48,66],"these":[33,60,134,187,290],"sources":[34],"converting":[36],"it":[37,262],"to":[38,104,132,165,273],"a":[39,44,117,204,226,271],"structured":[40],"form":[41],"have":[42,62,80,191,246],"been":[43,193],"target":[45],"research":[47,200],"the":[49,74,77,107,110,124,127,142,170,197,213,238,266],"natural":[50],"language":[51],"processing":[52],"(NLP),":[53],"mining,":[55],"database":[57],"communities.":[58],"While":[59,186],"researchers":[61],"largely":[63,149,192],"separated":[64],"extraction":[65,228,245,268],"web":[67,175],"into":[69,225],"different":[70,135,188],"problems":[71,83],"based":[72],"on":[73,151,180],"modality":[75],"data,":[78,90],"they":[79],"faced":[81],"similar":[82],"such":[84],"as":[85,255,270],"learning":[86,152,235],"with":[87,106,281],"limited":[88],"labeled":[89],"defining":[91],"(or":[92],"avoiding":[93],"defining)":[94],"ontologies,":[95],"making":[96],"use":[97],"prior":[99],"knowledge,":[100],"scaling":[102],"solutions":[103,130,275],"deal":[105],"size":[108],"Web.":[111],"In":[112],"this":[113,259],"tutorial":[114],"we":[115],"take":[116],"holistic":[118],"view":[119],"toward":[120,208,248],"extraction,":[122,210],"exploring":[123],"commonalities":[125],"challenges":[128],"developed":[131],"address":[133],"forms":[136],"text.":[138],"We":[139,287],"will":[140,288],"explore":[141],"approaches":[143,158,173,291],"targeted":[144,159],"at":[145,160],"text":[147],"that":[148,163,276],"rely":[150,178],"syntactic":[153],"or":[154],"semantic":[155],"patterns,":[157],"documents":[162],"learn":[164],"identify":[166],"structural":[167],"patterns":[168],"template,":[171],"targeting":[174],"tables":[176],"which":[177,212],"heavily":[179],"entity":[181],"linking":[182],"type":[184],"information.":[185,286],"modalities":[190],"considered":[194],"separately":[195],"past,":[198],"recent":[199],"has":[201],"started":[202],"taking":[203],"more":[205],"inclusive":[206],"approach":[207],"multiple":[214],"signals":[215],"offered":[216],"by":[217,232],"textual,":[218],"layout,":[219],"visual":[221,282],"clues":[222],"are":[223],"combined":[224],"single":[227],"model":[229],"made":[230],"possible":[231],"new":[233],"deep":[234],"approaches.":[236],"At":[237],"same":[239],"time,":[240],"trends":[241],"within":[242],"purely":[243],"shifted":[247],"full-document":[249],"understanding":[250],"rather":[251],"than":[252],"considering":[253,265],"sentences":[254],"independent":[256],"units.":[257],"With":[258],"mind,":[261],"is":[263],"worth":[264],"problem":[269],"whole":[272],"motivate":[274],"harness":[277],"semantics":[279],"along":[280],"layout":[285],"discuss":[289],"suggest":[293],"avenues":[294],"future":[296],"work.":[297]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3037361235","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1}],"updated_date":"2025-01-17T10:45:17.245485","created_date":"2020-07-02"}