{"id":"https://openalex.org/W4403663121","doi":"https://doi.org/10.48550/arxiv.2409.07610","title":"When More Data Hurts: Optimizing Data Coverage While Mitigating\n Diversity Induced Underfitting in an Ultra-Fast Machine-Learned Potential","display_name":"When More Data Hurts: Optimizing Data Coverage While Mitigating\n Diversity Induced Underfitting in an Ultra-Fast Machine-Learned Potential","publication_year":2024,"publication_date":"2024-09-11","ids":{"openalex":"https://openalex.org/W4403663121","doi":"https://doi.org/10.48550/arxiv.2409.07610"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.07610","pdf_url":"http://arxiv.org/pdf/2409.07610","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.07610","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029545175","display_name":"Jason B. Gibson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gibson, Jason B.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092514203","display_name":"Tesia Janicki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Janicki, Tesia D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079313839","display_name":"A. C. Hire","orcid":"https://orcid.org/0000-0003-3147-2521"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hire, Ajinkya C.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088169017","display_name":"Chris Bishop","orcid":"https://orcid.org/0000-0002-1505-1287"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bishop, Chris","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083791063","display_name":"J. Matthew D. Lane","orcid":"https://orcid.org/0000-0002-7878-9158"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lane, J. Matthew D.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5043599635","display_name":"Richard G. Hennig","orcid":"https://orcid.org/0000-0003-4933-7686"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hennig, Richard G.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":83},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9106,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11801","display_name":"Reservoir Engineering and Simulation Methods","score":0.9106,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.66457045},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5484985},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4117129},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34687674},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3235644},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.11730927},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.07610","pdf_url":"http://arxiv.org/pdf/2409.07610","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.07610","pdf_url":"http://arxiv.org/pdf/2409.07610","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Machine-learned":[0],"interatomic":[1],"potentials":[2],"(MLIPs)":[3],"are":[4],"becoming":[5],"an":[6],"essential":[7],"tool":[8],"in":[9,43,120,155,197],"materials":[10],"modeling.":[11],"However,":[12],"optimizing":[13],"the":[14,22,44,53,77,82,101,112,133,144,152,179,187],"generation":[15],"of":[16,49,79,111,151,189],"training":[17,45,67,73,102,121,153,191],"data":[18,74,98,103,122,192],"used":[19],"to":[20,87,99,109,193],"parameterize":[21],"MLIPs":[23,31,80],"remains":[24],"a":[25,117,149],"significant":[26],"challenge.":[27],"This":[28,69],"is":[29],"because":[30],"can":[32,131],"fail":[33],"when":[34],"encountering":[35],"local":[36],"enviroments":[37],"too":[38],"different":[39],"from":[40],"those":[41],"present":[42],"data.":[46,68,113],"The":[47],"difficulty":[48],"determining":[50],"\\textit{a":[51],"priori}":[52],"environments":[54],"that":[55,143],"will":[56],"be":[57],"encountered":[58],"during":[59],"molecular":[60],"dynamics":[61],"(MD)":[62],"simulation":[63,138,166],"necessitates":[64],"diverse,":[65],"high-quality":[66],"study":[70],"investigates":[71],"how":[72],"diversity":[75,125,130],"affects":[76],"performance":[78,196],"using":[81],"Ultra-Fast":[83],"Force":[84],"Field":[85],"(UF$^3$)":[86],"model":[88],"amorphous":[89],"silicon":[90],"nitride.":[91],"We":[92],"employ":[93],"expert":[94],"and":[95,104,165],"autonomously":[96],"generated":[97],"create":[100],"fit":[105],"four":[106],"force-field":[107],"variants":[108],"subsets":[110],"Our":[114],"findings":[115],"reveal":[116],"critical":[118],"balance":[119],"diversity:":[123],"insufficient":[124],"hinders":[126],"generalization,":[127],"while":[128],"excessive":[129],"exceed":[132],"MLIP's":[134],"learning":[135],"capacity,":[136],"reducing":[137],"accuracy.":[139],"Specifically,":[140],"we":[141,177],"found":[142],"UF$^3$":[145,175],"variant":[146],"trained":[147],"on":[148],"subset":[150],"data,":[154],"which":[156],"nitrogen-rich":[157],"structures":[158],"were":[159],"removed,":[160],"offered":[161],"vastly":[162],"better":[163],"prediction":[164],"accuracy":[167],"than":[168],"any":[169],"other":[170],"variant.":[171],"By":[172],"comparing":[173],"these":[174],"variants,":[176],"highlight":[178],"nuanced":[180],"requirements":[181],"for":[182],"creating":[183],"accurate":[184],"MLIPs,":[185],"emphasizing":[186],"importance":[188],"application-specific":[190],"achieve":[194],"optimal":[195],"modeling":[198],"complex":[199],"material":[200],"behaviors.":[201]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403663121","counts_by_year":[],"updated_date":"2025-01-20T06:13:09.807533","created_date":"2024-10-23"}