{"id":"https://openalex.org/W4385572894","doi":"https://doi.org/10.18653/v1/2022.emnlp-main.26","title":"Translation between Molecules and Natural Language","display_name":"Translation between Molecules and Natural Language","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4385572894","doi":"https://doi.org/10.18653/v1/2022.emnlp-main.26"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2022.emnlp-main.26","pdf_url":"https://aclanthology.org/2022.emnlp-main.26.pdf","source":{"id":"https://openalex.org/S4363608991","display_name":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://aclanthology.org/2022.emnlp-main.26.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061547469","display_name":"Carl K. Edwards","orcid":"https://orcid.org/0000-0002-7353-0200"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carl Edwards","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036250098","display_name":"Tuan Lai","orcid":"https://orcid.org/0000-0002-1903-6674"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tuan Lai","raw_affiliation_strings":["University of Illinois Urbana-Champaign","X, the Moonshot Factory"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"X, the Moonshot Factory","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090930912","display_name":"Kevin Ros","orcid":"https://orcid.org/0009-0007-0961-2694"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kevin Ros","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047062065","display_name":"Garrett Honke","orcid":"https://orcid.org/0000-0001-6678-044X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Garrett Honke","raw_affiliation_strings":["X, the Moonshot Factory"],"affiliations":[{"raw_affiliation_string":"X, the Moonshot Factory","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013240665","display_name":"Kyunghyun Cho","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyunghyun Cho","raw_affiliation_strings":["Genentech","New York University,"],"affiliations":[{"raw_affiliation_string":"New York University,","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Genentech","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075033889","display_name":"Heng Ji","orcid":"https://orcid.org/0000-0002-0464-7966"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heng Ji","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":19.881,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":48,"citation_normalized_percentile":{"value":0.998292,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9925,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9838,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10911","display_name":"Chemical Synthesis and Analysis","score":0.9819,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural Language Generation","score":0.53685737},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.48260725},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.43630514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7786056},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6001673},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5693133},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.56165296},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.557484},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.53952676},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.53685737},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.49747065},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.48260725},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4541745},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43630514},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.43459722},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.12420574},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06604755},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.063472986},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2022.emnlp-main.26","pdf_url":"https://aclanthology.org/2022.emnlp-main.26.pdf","source":{"id":"https://openalex.org/S4363608991","display_name":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2204.11817","pdf_url":"https://arxiv.org/pdf/2204.11817","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2022.emnlp-main.26","pdf_url":"https://aclanthology.org/2022.emnlp-main.26.pdf","source":{"id":"https://openalex.org/S4363608991","display_name":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Quality education","id":"https://metadata.un.org/sdg/4","score":0.85}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":64,"referenced_works":["https://openalex.org/W1614298861","https://openalex.org/W1757990252","https://openalex.org/W1889081078","https://openalex.org/W1895577753","https://openalex.org/W1975147762","https://openalex.org/W1983478747","https://openalex.org/W1988037271","https://openalex.org/W2038702914","https://openalex.org/W2123301721","https://openalex.org/W2152772232","https://openalex.org/W2154652894","https://openalex.org/W2157331557","https://openalex.org/W2168322495","https://openalex.org/W2200017991","https://openalex.org/W2405035126","https://openalex.org/W2405756170","https://openalex.org/W2555897561","https://openalex.org/W2886544065","https://openalex.org/W2896348597","https://openalex.org/W2896457183","https://openalex.org/W2949555952","https://openalex.org/W2952638691","https://openalex.org/W2963138277","https://openalex.org/W2963834202","https://openalex.org/W2963966654","https://openalex.org/W2964113829","https://openalex.org/W2966715458","https://openalex.org/W2969876226","https://openalex.org/W2970771982","https://openalex.org/W2980282514","https://openalex.org/W3001279689","https://openalex.org/W3045928028","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3093934881","https://openalex.org/W3107826490","https://openalex.org/W3109892317","https://openalex.org/W3116865743","https://openalex.org/W3129576130","https://openalex.org/W3146384714","https://openalex.org/W3160021293","https://openalex.org/W3166396011","https://openalex.org/W3182293212","https://openalex.org/W3198473709","https://openalex.org/W3201083857","https://openalex.org/W3209056694","https://openalex.org/W3211951295","https://openalex.org/W4206706211","https://openalex.org/W4212837331","https://openalex.org/W4220902634","https://openalex.org/W4224035735","https://openalex.org/W4224442590","https://openalex.org/W4226133175","https://openalex.org/W4231949702","https://openalex.org/W4247259022","https://openalex.org/W4251095682","https://openalex.org/W4253877692","https://openalex.org/W4285659244","https://openalex.org/W4288089799","https://openalex.org/W4294170691","https://openalex.org/W4296979096","https://openalex.org/W4301206121","https://openalex.org/W4312922092","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W783305165","https://openalex.org/W4285877427","https://openalex.org/W382594479","https://openalex.org/W3009270862","https://openalex.org/W2972060578","https://openalex.org/W2575772232","https://openalex.org/W2470045054","https://openalex.org/W2152921782","https://openalex.org/W2140902089","https://openalex.org/W2030298461"],"abstract_inverted_index":{"We":[0],"present":[1],"MolT5":[2,23,59],"-":[3],"a":[4,12,81],"self-supervised":[5],"learning":[6],"framework":[7],"for":[8,25,54],"pretraining":[9],"models":[10,61,102],"on":[11,62],"vast":[13],"amount":[14],"of":[15,31,72,90],"unlabeled":[16],"natural":[17],"language":[18],"text":[19],"and":[20,28,39,49,93,110],"molecule":[21,37,43,91,95],"strings.":[22],"allows":[24],"new,":[26],"useful,":[27],"challenging":[29],"analogs":[30],"traditional":[32],"vision-language":[33],"tasks,":[34],"such":[35],"as":[36],"captioning":[38,92],"text-based":[40,94],"de":[41],"novo":[42],"generation":[44],"(altogether:":[45],"translation":[46],"between":[47],"molecules":[48,109],"language),":[50],"which":[51,112],"we":[52,76],"explore":[53],"the":[55,68,88],"first":[56],"time.":[57],"Since":[58],"pretrains":[60],"single-modal":[63],"data,":[64],"it":[65],"helps":[66],"overcome":[67],"chemistry":[69],"domain":[70],"shortcoming":[71],"data":[73],"scarcity.":[74],"Furthermore,":[75],"consider":[77],"several":[78],"metrics,":[79],"including":[80],"new":[82],"cross-modal":[83],"embedding-based":[84],"metric,":[85],"to":[86,105],"evaluate":[87],"tasks":[89],"generation.":[96],"Our":[97],"results":[98],"show":[99],"that":[100],"MolT5-based":[101],"are":[103,116],"able":[104],"generate":[106],"outputs,":[107],"both":[108],"captions,":[111],"in":[113],"many":[114],"cases":[115],"high":[117],"quality.":[118]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4385572894","counts_by_year":[{"year":2024,"cited_by_count":24},{"year":2023,"cited_by_count":20},{"year":2022,"cited_by_count":4}],"updated_date":"2025-01-02T14:35:21.488834","created_date":"2023-08-05"}