{"id":"https://openalex.org/W4399455261","doi":"https://doi.org/10.48550/arxiv.2406.04197","title":"DICE: Detecting In-distribution Contamination in LLM's Fine-tuning Phase\n for Math Reasoning","display_name":"DICE: Detecting In-distribution Contamination in LLM's Fine-tuning Phase\n for Math Reasoning","publication_year":2024,"publication_date":"2024-06-06","ids":{"openalex":"https://openalex.org/W4399455261","doi":"https://doi.org/10.48550/arxiv.2406.04197"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04197","pdf_url":"http://arxiv.org/pdf/2406.04197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2406.04197","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111223823","display_name":"S.Y. Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Shangqing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113244348","display_name":"Kejian Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kejian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037091113","display_name":"Yushi Bai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bai, Yushi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040604135","display_name":"Zijun Yao","orcid":"https://orcid.org/0000-0003-3647-8770"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yao, Zijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071217955","display_name":"Lei Hou","orcid":"https://orcid.org/0000-0001-8873-8728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hou, Lei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5059319053","display_name":"J. B. Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Juanzi","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":77},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.8726,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.8726,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.8176,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.752,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dice","display_name":"Dice","score":0.8270992}],"concepts":[{"id":"https://openalex.org/C22029948","wikidata":"https://www.wikidata.org/wiki/Q45089","display_name":"Dice","level":2,"score":0.8270992},{"id":"https://openalex.org/C112570922","wikidata":"https://www.wikidata.org/wiki/Q60528603","display_name":"Contamination","level":2,"score":0.7708543},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.45220566},{"id":"https://openalex.org/C44280652","wikidata":"https://www.wikidata.org/wiki/Q104837","display_name":"Phase (matter)","level":2,"score":0.4328075},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.40357327},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.3562463},{"id":"https://openalex.org/C2777686260","wikidata":"https://www.wikidata.org/wiki/Q144037","display_name":"Calculus (dental)","level":2,"score":0.33968002},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.33445233},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.23768154},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.19190544},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.109861106},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.09777051},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C199343813","wikidata":"https://www.wikidata.org/wiki/Q12128","display_name":"Dentistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04197","pdf_url":"http://arxiv.org/pdf/2406.04197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2406.04197","pdf_url":"http://arxiv.org/pdf/2406.04197","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4366341510","https://openalex.org/W3104750253","https://openalex.org/W3021239166","https://openalex.org/W2906397153","https://openalex.org/W2586273397","https://openalex.org/W2483429559","https://openalex.org/W2390936256","https://openalex.org/W2385445039","https://openalex.org/W2016385589","https://openalex.org/W2009559548"],"abstract_inverted_index":{"The":[0,206],"advancement":[1],"of":[2,84,108,133,164,198,202],"large":[3],"language":[4],"models":[5],"(LLMs)":[6],"relies":[7],"on":[8,24,48,56,104,174],"evaluation":[9],"using":[10],"public":[11],"benchmarks,":[12],"but":[13],"data":[14,37,49,53,209],"contamination":[15,26,119,143,191],"can":[16],"lead":[17,194],"to":[18,51,86,97,141,195],"overestimated":[19],"performance.":[20],"Previous":[21],"researches":[22],"focus":[23],"detecting":[25,117],"by":[27,168],"determining":[28],"whether":[29],"the":[30,34,81,88,93,105,130,134,154,162,189,199],"model":[31],"has":[32],"seen":[33],"exact":[35],"same":[36],"during":[38],"training.":[39],"In":[40],"this":[41],"work,":[42],"we":[43,64,73,151],"argue":[44],"that":[45,79,109,153,188],"even":[46],"training":[47],"similar":[50,148],"benchmark":[52],"inflates":[54],"performance":[55,163],"in-distribution":[57,71,118,190],"tasks":[58],"without":[59],"improving":[60],"overall":[61],"capacity,":[62],"which":[63,138],"called":[65],"In-distribution":[66],"contamination.":[67,89],"To":[68],"effectively":[69],"detect":[70,142],"contamination,":[72,98],"propose":[74],"DICE,":[75],"a":[76,101],"novel":[77],"method":[78],"leverages":[80],"internal":[82,106],"states":[83,107],"LLMs":[85,122,166],"locate-then-detect":[87],"DICE":[90,136,155],"first":[91],"identifies":[92],"most":[94],"sensitive":[95],"layer":[96],"then":[99],"trains":[100],"classifier":[102],"based":[103],"layer.":[110],"Experiments":[111],"reveal":[112],"DICE's":[113],"high":[114],"accuracy":[115],"in":[116],"across":[120,144],"various":[121],"and":[123,184,208],"math":[124,176],"reasoning":[125,177],"datasets.":[126],"We":[127],"also":[128],"show":[129],"generalization":[131],"capability":[132],"trained":[135],"detector,":[137],"is":[139],"able":[140],"multiple":[145],"benchmarks":[146],"with":[147,161],"distributions.":[149],"Additionally,":[150],"find":[152],"detection":[156],"scores":[157],"are":[158,210],"positively":[159],"correlated":[160],"ten":[165],"fine-tuned":[167],"either":[169],"us":[170],"or":[171],"other":[172],"organizations":[173],"four":[175],"datasets":[178],"(with":[179],"$R^2$":[180],"values":[181],"between":[182],"0.6":[183],"0.75).":[185],"This":[186],"indicates":[187],"problem":[192],"potentially":[193],"an":[196],"overestimation":[197],"true":[200],"capabilities":[201],"many":[203],"existing":[204],"models.":[205],"code":[207],"available":[211],"at":[212],"https://github.com/THU-KEG/DICE.":[213]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399455261","counts_by_year":[],"updated_date":"2025-04-23T18:13:19.148312","created_date":"2024-06-08"}