{"id":"https://openalex.org/W4383473389","doi":"https://doi.org/10.48550/arxiv.2307.02337","title":"FAM: Relative Flatness Aware Minimization","display_name":"FAM: Relative Flatness Aware Minimization","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4383473389","doi":"https://doi.org/10.48550/arxiv.2307.02337"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.02337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2307.02337","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040989782","display_name":"Linara Adilova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adilova, Linara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012979553","display_name":"Amr Abourayya","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abourayya, Amr","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101571576","display_name":"Jianning Li","orcid":"https://orcid.org/0000-0002-3782-9547"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jianning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007940140","display_name":"Amin Dada","orcid":"https://orcid.org/0000-0003-4016-7799"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dada, Amin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086123147","display_name":"Henning Petzka","orcid":"https://orcid.org/0000-0002-6162-8526"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Petzka, Henning","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010662766","display_name":"Jan Egger","orcid":"https://orcid.org/0000-0002-5225-1982"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Egger, Jan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017161970","display_name":"Jens Kleesiek","orcid":"https://orcid.org/0000-0001-8686-0682"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kleesiek, Jens","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5080777994","display_name":"Michael Kamp","orcid":"https://orcid.org/0000-0001-6231-0694"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kamp, Michael","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":67},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.997,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9906,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9837,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/flatness","display_name":"Flatness (cosmology)","score":0.9193831},{"id":"https://openalex.org/keywords/hessian-matrix","display_name":"Hessian matrix","score":0.6786253},{"id":"https://openalex.org/keywords/minification","display_name":"Minification","score":0.53635913}],"concepts":[{"id":"https://openalex.org/C2778530986","wikidata":"https://www.wikidata.org/wiki/Q5457948","display_name":"Flatness (cosmology)","level":3,"score":0.9193831},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7049912},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.6786253},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6172752},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5616445},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.53635913},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45638162},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.39140573},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.385571},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3355022},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28604984},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.105451226},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C26405456","wikidata":"https://www.wikidata.org/wiki/Q338","display_name":"Cosmology","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.02337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2307.02337","pdf_url":"http://arxiv.org/pdf/2307.02337","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2307.02337","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2307.02337","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4297883503","https://openalex.org/W4283017538","https://openalex.org/W3021699548","https://openalex.org/W2802707792","https://openalex.org/W2611031068","https://openalex.org/W2569979269","https://openalex.org/W2075777916","https://openalex.org/W1996936972","https://openalex.org/W1704347466","https://openalex.org/W1545275724"],"abstract_inverted_index":{"Flatness":[0],"of":[1,53,56,68,75,139,143,161,168,190],"the":[2,54,69,105,136,144,162,166,169,202],"loss":[3,131,163],"curve":[4],"around":[5],"a":[6,76,93,113,140,188],"model":[7],"at":[8,205],"hand":[9],"has":[10,23],"been":[11,24],"shown":[12],"to":[13,101,123,150],"empirically":[14],"correlate":[15],"with":[16,129,155],"its":[17],"generalization":[18,102,186],"ability.":[19],"Optimizing":[20],"for":[21],"flatness":[22,61,81,96,119,181],"proposed":[25],"as":[26,28],"early":[27],"1994":[29],"by":[30,37],"Hochreiter":[31],"and":[32,34,62,103,127,154,192,197],"Schmidthuber,":[33],"was":[35],"followed":[36],"more":[38],"recent":[39],"successful":[40],"sharpness-aware":[41],"optimization":[42],"techniques.":[43],"Their":[44],"widespread":[45],"adoption":[46],"in":[47,64,66,165,187,195],"practice,":[48],"though,":[49],"is":[50,121],"dubious":[51],"because":[52],"lack":[55],"theoretically":[57],"grounded":[58],"connection":[59],"between":[60],"generalization,":[63],"particular":[65,94],"light":[67],"reparameterization":[70,106],"curse":[71],"-":[72],"certain":[73],"reparameterizations":[74],"neural":[77,152],"network":[78],"change":[79,86],"most":[80],"measures":[82],"but":[83],"do":[84],"not":[85],"generalization.":[87],"Recent":[88],"theoretical":[89],"work":[90],"suggests":[91],"that":[92,120,178],"relative":[95,118,180],"measure":[97],"can":[98],"be":[99],"connected":[100],"solves":[104],"curse.":[107],"In":[108,171],"this":[109,117,179],"paper,":[110],"we":[111,176],"derive":[112],"regularizer":[114],"based":[115],"on":[116],"easy":[122],"compute,":[124],"fast,":[125],"efficient,":[126],"works":[128],"arbitrary":[130],"functions.":[132],"It":[133],"requires":[134],"computing":[135],"Hessian":[137],"only":[138],"single":[141],"layer":[142],"network,":[145],"which":[146],"makes":[147],"it":[148,156],"applicable":[149],"large":[151],"networks,":[153],"avoids":[157],"an":[158,172],"expensive":[159],"mapping":[160],"surface":[164],"vicinity":[167],"model.":[170],"extensive":[173],"empirical":[174],"evaluation":[175],"show":[177],"aware":[182],"minimization":[183],"(FAM)":[184],"improves":[185],"multitude":[189],"applications":[191],"models,":[193],"both":[194],"finetuning":[196],"standard":[198],"training.":[199],"We":[200],"make":[201],"code":[203],"available":[204],"github.":[206]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4383473389","counts_by_year":[],"updated_date":"2025-01-06T19:35:29.574292","created_date":"2023-07-07"}