{"id":"https://openalex.org/W4393723397","doi":"https://doi.org/10.48550/arxiv.2404.00044","title":"UAlign: Pushing the Limit of Template-free Retrosynthesis Prediction\n with Unsupervised SMILES Alignment","display_name":"UAlign: Pushing the Limit of Template-free Retrosynthesis Prediction\n with Unsupervised SMILES Alignment","publication_year":2024,"publication_date":"2024-03-24","ids":{"openalex":"https://openalex.org/W4393723397","doi":"https://doi.org/10.48550/arxiv.2404.00044"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2404.00044","pdf_url":"https://arxiv.org/pdf/2404.00044","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2404.00044","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073063939","display_name":"Kaipeng Zeng","orcid":"https://orcid.org/0000-0002-4798-7784"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Kaipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072455606","display_name":"Xin Zhao","orcid":"https://orcid.org/0000-0001-5796-0449"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433666","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0003-0140-7366"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102560392","display_name":"Fan Nie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Fan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019708391","display_name":"Xiaokang Yang","orcid":"https://orcid.org/0000-0003-4029-3322"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Xiaokang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085787425","display_name":"Yaohui Jin","orcid":"https://orcid.org/0000-0001-6158-6277"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jin, Yaohui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100956614","display_name":"Yanyan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yanyan","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.6854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.6854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10862","display_name":"AI in cancer detection","score":0.654,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.6358,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/struct","display_name":"struct","score":0.59657085},{"id":"https://openalex.org/keywords/retrosynthetic-analysis","display_name":"Retrosynthetic analysis","score":0.5906365}],"concepts":[{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.74076176},{"id":"https://openalex.org/C2776142590","wikidata":"https://www.wikidata.org/wiki/Q1164699","display_name":"struct","level":2,"score":0.59657085},{"id":"https://openalex.org/C42437451","wikidata":"https://www.wikidata.org/wiki/Q902227","display_name":"Retrosynthetic analysis","level":3,"score":0.5906365},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4864447},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44065756},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35757565},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.34933048},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3202331},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.06917101},{"id":"https://openalex.org/C71240020","wikidata":"https://www.wikidata.org/wiki/Q186011","display_name":"Stereochemistry","level":1,"score":0.044905484},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C35753019","wikidata":"https://www.wikidata.org/wiki/Q644785","display_name":"Total synthesis","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2404.00044","pdf_url":"https://arxiv.org/pdf/2404.00044","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2404.00044","pdf_url":"https://arxiv.org/pdf/2404.00044","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4389188538","https://openalex.org/W4385784498","https://openalex.org/W4233772642","https://openalex.org/W2890286717","https://openalex.org/W2789619702","https://openalex.org/W2402002076","https://openalex.org/W2341583964","https://openalex.org/W1997223755","https://openalex.org/W1993979952","https://openalex.org/W1443743067"],"abstract_inverted_index":{"Retrosynthesis":[0],"planning":[1,22],"poses":[2],"a":[3,17,26,65,104,109,158],"formidable":[4],"challenge":[5],"in":[6,12,20,28,30,36,50,172],"the":[7,21,85,93,96,118,167,232],"organic":[8],"chemical":[9,58,105,180],"industry,":[10],"particularly":[11],"pharmaceuticals.":[13],"Single-step":[14],"retrosynthesis":[15,70,162],"prediction,":[16],"crucial":[18],"step":[19],"process,":[23],"has":[24],"witnessed":[25],"surge":[27],"interest":[29],"recent":[31,51],"years":[32],"due":[33],"to":[34,116,223],"advancements":[35],"AI":[37],"for":[38,47,69,123,188],"science.":[39],"Various":[40],"deep":[41],"learning-based":[42],"methods":[43,171,214],"have":[44],"been":[45],"proposed":[46],"this":[48],"task":[49],"years,":[52],"incorporating":[53],"diverse":[54],"levels":[55],"of":[56,89,98,120,169,179],"additional":[57],"knowledge":[59],"dependency.":[60],"This":[61],"paper":[62],"introduces":[63],"UAlign,":[64],"template-free":[66,135,141,161,213],"graph-to-sequence":[67,160],"pipeline":[68,164],"prediction.":[71],"By":[72],"combining":[73],"graph":[74,87],"neural":[75],"networks":[76],"and":[77,136,176,215,226],"Transformers,":[78],"our":[79,130],"method":[80,131,142],"can":[81],"more":[82],"effectively":[83],"leverage":[84],"inherent":[86],"structure":[88],"molecules.":[90],"Based":[91],"on":[92],"fact":[94],"that":[95,129,165,208],"majority":[97],"molecule":[99],"structures":[100,122],"remain":[101],"unchanged":[102,121],"during":[103],"reaction,":[106],"we":[107],"propose":[108,183],"simple":[110],"yet":[111],"effective":[112],"SMILES":[113,194,202],"alignment":[114,203],"technique":[115],"facilitate":[117],"reuse":[119],"reactant":[124,193],"generation.":[125],"Extensive":[126,205],"experiments":[127,206],"show":[128],"substantially":[132],"outperforms":[133,211],"state-of-the-art":[134,212],"semi-template-based":[137],"approaches.":[138],"Importantly,":[139],"Our":[140],"achieves":[143],"effectiveness":[144],"comparable":[145],"to,":[146],"or":[147,217],"even":[148,197],"surpasses,":[149],"established":[150],"powerful":[151],"template-based":[152,219],"methods.":[153,204],"Scientific":[154],"contribution:":[155],"We":[156,182],"present":[157],"novel":[159],"prediction":[163],"overcomes":[166],"limitations":[168],"Transformer-based":[170],"molecular":[173],"representation":[174],"learning":[175,186],"insufficient":[177],"utilization":[178],"information.":[181],"an":[184],"unsupervised":[185],"mechanism":[187],"establishing":[189],"product-atom":[190],"correspondence":[191],"with":[192,221],"tokens,":[195],"achieving":[196],"better":[198],"results":[199],"than":[200],"supervised":[201],"demonstrate":[207],"UAlign":[209],"significantly":[210],"rivals":[216],"surpasses":[218],"approaches,":[220],"up":[222],"5\\%":[224],"(top-5)":[225],"5.4\\%":[227],"(top-10)":[228],"increased":[229],"accuracy":[230],"over":[231],"strongest":[233],"baseline.":[234]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4393723397","counts_by_year":[],"updated_date":"2024-12-06T01:13:38.246098","created_date":"2024-04-03"}