{"id":"https://openalex.org/W4400342781","doi":"https://doi.org/10.48550/arxiv.2407.01490","title":"LLM See, LLM Do: Guiding Data Generation to Target Non-Differentiable\n Objectives","display_name":"LLM See, LLM Do: Guiding Data Generation to Target Non-Differentiable\n Objectives","publication_year":2024,"publication_date":"2024-07-01","ids":{"openalex":"https://openalex.org/W4400342781","doi":"https://doi.org/10.48550/arxiv.2407.01490"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.01490","pdf_url":"http://arxiv.org/pdf/2407.01490","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2407.01490","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100019299","display_name":"Lu\u00edsa Shimabucoro","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shimabucoro, Lu\u00edsa","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037310413","display_name":"Sebastian Ruder","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruder, Sebastian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048307591","display_name":"Julia Kreutzer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kreutzer, Julia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070153551","display_name":"Marzieh Fadaee","orcid":"https://orcid.org/0000-0002-4447-1213"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fadaee, Marzieh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5078850040","display_name":"Sara Hooker","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hooker, Sara","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":78},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.7725,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.7725,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.62829983},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46663007},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21148774},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.01490","pdf_url":"http://arxiv.org/pdf/2407.01490","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2407.01490","pdf_url":"http://arxiv.org/pdf/2407.01490","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W4327738859","https://openalex.org/W4286826125","https://openalex.org/W4285277090","https://openalex.org/W3181683615","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2348722996","https://openalex.org/W2334570605","https://openalex.org/W1633485514"],"abstract_inverted_index":{"The":[0],"widespread":[1],"adoption":[2],"of":[3,33,36,44,51,57,61,141,157,174,209],"synthetic":[4,45,62,88,158,192],"data":[5,14,46,63,89,127,143,193],"raises":[6],"new":[7],"questions":[8],"about":[9],"how":[10,58,201],"models":[11,20,78,115,167,210],"generating":[12],"the":[13,31,42,52,59,87,95,108,114,117,126,139,155,206],"can":[15,100,110,204],"influence":[16],"other":[17],"large":[18],"language":[19],"(LLMs)":[21],"via":[22],"distilled":[23],"data.":[24],"To":[25],"start,":[26],"our":[27],"work":[28],"exhaustively":[29],"characterizes":[30],"impact":[32],"passive":[34],"inheritance":[35,184,203],"model":[37],"properties":[38,118],"by":[39,124],"systematically":[40],"studying":[41],"consequences":[43],"integration.":[47],"We":[48,75,181,199],"provide":[49],"one":[50],"most":[53],"comprehensive":[54],"studies":[55],"to-date":[56],"source":[60],"shapes":[64],"models'":[65],"internal":[66],"biases,":[67],"calibration":[68],"and":[69,73],"generations'":[70],"textual":[71],"attributes":[72,84],"preferences.":[74],"find":[76],"that":[77],"are":[79],"surprisingly":[80],"sensitive":[81],"towards":[82,116,165,211],"certain":[83],"even":[85],"when":[86],"prompts":[90],"appear":[91],"\"neutral\".":[92],"which":[93],"invites":[94],"question":[96,109,178],"whether":[97],"this":[98,177],"sensitivity":[99],"be":[101],"exploited":[102],"for":[103],"good.":[104],"Our":[105],"findings":[106],"invite":[107],"we":[111,119],"explicitly":[112],"steer":[113,205],"want":[120],"at":[121],"test":[122],"time":[123],"exploiting":[125],"generation":[128,207],"process?":[129],"This":[130],"would":[131],"have":[132],"historically":[133],"been":[134],"considered":[135],"infeasible":[136],"due":[137],"to":[138,169,188,195],"cost":[140],"collecting":[142],"with":[144],"a":[145,163,171,186,196],"specific":[146],"characteristic":[147],"or":[148,219],"objective":[149],"in":[150,154],"mind.":[151],"However,":[152],"improvement":[153],"quality":[156],"data,":[159],"as":[160,162,185],"well":[161],"shift":[164],"general-purpose":[166],"designed":[168],"follow":[170],"diverse":[172],"way":[173],"instructions,":[175],"means":[176],"is":[179],"timely.":[180],"propose":[182],"active":[183,202],"term":[187],"describe":[189],"intentionally":[190],"constraining":[191],"according":[194],"non-differentiable":[197,213],"objective.":[198],"demonstrate":[200],"profiles":[208],"desirable":[212],"attributes,":[214],"e.g.":[215],"high":[216],"lexical":[217],"diversity":[218],"low":[220],"toxicity.":[221]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4400342781","counts_by_year":[],"updated_date":"2025-04-09T12:43:57.083071","created_date":"2024-07-06"}