{"id":"https://openalex.org/W3137281154","doi":"https://doi.org/10.1109/bigdata50022.2020.9378212","title":"Improving Model Training by Periodic Sampling over Weight Distributions","display_name":"Improving Model Training by Periodic Sampling over Weight Distributions","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3137281154","doi":"https://doi.org/10.1109/bigdata50022.2020.9378212","mag":"3137281154"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378212","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":"https://arxiv.org/pdf/1905.05774","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001997489","display_name":"Samarth Tripathi","orcid":null},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Samarth Tripathi","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023776843","display_name":"Jiayi Liu","orcid":"https://orcid.org/0009-0002-8928-7564"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiayi Liu","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058450118","display_name":"Sauptik Dhar","orcid":"https://orcid.org/0000-0002-3555-5466"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sauptik Dhar","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057706634","display_name":"Unmesh Kurup","orcid":"https://orcid.org/0000-0002-3427-0418"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Unmesh Kurup","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101821773","display_name":"Mohak Shah","orcid":"https://orcid.org/0000-0003-0409-1563"},"institutions":[],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohak Shah","raw_affiliation_strings":["LG Advanced AI Lab, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"LG Advanced AI Lab, Santa Clara, CA, USA","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":"112","last_page":"122"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9997,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6938851},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6709511},{"id":"https://openalex.org/C72169020","wikidata":"https://www.wikidata.org/wiki/Q194404","display_name":"Monotonic function","level":2,"score":0.6547343},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5929369},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.56580395},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5546257},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.53487897},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5167743},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.51408964},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.48787308},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.46142912},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.43210387},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.32632196},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20524767},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9378212","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1905.05774","pdf_url":"https://arxiv.org/pdf/1905.05774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/1905.05774","pdf_url":"https://arxiv.org/pdf/1905.05774","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":29,"referenced_works":["https://openalex.org/W1516903196","https://openalex.org/W1522301498","https://openalex.org/W1836465849","https://openalex.org/W2117539524","https://openalex.org/W2146502635","https://openalex.org/W2151695970","https://openalex.org/W2156779765","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2483727386","https://openalex.org/W2507296351","https://openalex.org/W2737258237","https://openalex.org/W2766164908","https://openalex.org/W2776855315","https://openalex.org/W2777662428","https://openalex.org/W2810784658","https://openalex.org/W2884822772","https://openalex.org/W2912811302","https://openalex.org/W2949117887","https://openalex.org/W2949341804","https://openalex.org/W2962933129","https://openalex.org/W2963016543","https://openalex.org/W2963163009","https://openalex.org/W2963173418","https://openalex.org/W2963446085","https://openalex.org/W2963446712","https://openalex.org/W2964121744","https://openalex.org/W3118608800","https://openalex.org/W4297685995"],"related_works":["https://openalex.org/W4380682190","https://openalex.org/W4315701745","https://openalex.org/W2945307361","https://openalex.org/W2346559293","https://openalex.org/W2116636209","https://openalex.org/W2102386043","https://openalex.org/W2077314575","https://openalex.org/W2053698688","https://openalex.org/W2005710836","https://openalex.org/W1990290471"],"abstract_inverted_index":{"Optimizing":[0],"deep":[1],"neural":[2,69,145],"networks":[3,70],"using":[4],"gradient-based":[5],"techniques":[6,85,140,185],"such":[7],"as":[8],"SGD":[9],"and":[10,23,64,77,123,127,151,156,180],"Adam":[11],"have":[12],"some":[13],"well":[14],"documented":[15],"problems":[16,35,108],"including":[17],"the":[18,45,117,144],"high":[19],"volatility,":[20],"low":[21],"stability":[22,76],"non-monotonicity":[24],"of":[25,90,106,143,174],"performance":[26,129,164],"improvements":[27,165,179],"during":[28],"training.":[29],"One":[30],"approach":[31],"that":[32,93,166],"addresses":[33],"these":[34,139,178,184],"in":[36,135,158],"convex":[37],"settings":[38,63],"is":[39],"Polyak-Ruppert":[40],"Averaging":[41],"(PRA)":[42],"which":[43],"averages":[44],"model":[46,91],"weights":[47,92],"distributions":[48],"over":[49],"gradient":[50,96,148],"updates.":[51],"But":[52],"this":[53,81],"technique":[54],"(and":[55],"its":[56],"variants)":[57],"do":[58],"not":[59],"scale":[60],"for":[61,67,103],"non-convex":[62],"their":[65],"use":[66],"large":[68],"involves":[71],"a":[72,104,132,159,172],"tradeoff":[73],"between":[74],"increased":[75],"optimal":[78,153],"convergence.":[79],"In":[80],"paper,":[82],"we":[83],"introduce":[84],"centered":[86],"around":[87],"periodic":[88],"sampling":[89],"improve":[94],"upon":[95],"update":[97],"methods":[98,150],"(minibatch":[99],"SGD,":[100],"Momentum,":[101],"Adam)":[102],"variety":[105,173],"vision":[107],"(classification,":[109],"detection,":[110],"segmentation).":[111],"Compared":[112],"to":[113,176],"existing":[114,152],"PRA":[115],"approaches,":[116],"proposed":[118],"algorithms":[119],"provide":[120],"better,":[121],"faster":[122],"more":[124,188],"robust":[125],"convergence":[126],"training":[128,154],"with":[130,163],"only":[131],"slight":[133],"increase":[134],"computation":[136],"time.":[137],"Importantly,":[138],"are":[141,167],"independent":[142],"network":[146],"model,":[147],"optimization":[149],"policies,":[155],"converge":[157],"less":[160],"volatile":[161],"fashion":[162],"approximately":[168],"monotonic.":[169],"We":[170],"conduct":[171],"experiments":[175],"quantify":[177],"identify":[181],"scenarios":[182],"where":[183],"could":[186],"be":[187],"useful.":[189]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3137281154","counts_by_year":[],"updated_date":"2025-01-03T09:37:20.560039","created_date":"2021-03-29"}