{"id":"https://openalex.org/W4403810039","doi":"https://doi.org/10.48550/arxiv.2409.19019","title":"RAGProbe: An Automated Approach for Evaluating RAG Applications","display_name":"RAGProbe: An Automated Approach for Evaluating RAG Applications","publication_year":2024,"publication_date":"2024-09-24","ids":{"openalex":"https://openalex.org/W4403810039","doi":"https://doi.org/10.48550/arxiv.2409.19019"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.19019","pdf_url":"http://arxiv.org/pdf/2409.19019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/2409.19019","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083636648","display_name":"Shangeetha Sivasothy","orcid":"https://orcid.org/0000-0001-9204-4614"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sivasothy, Shangeetha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012088137","display_name":"Scott Barnett","orcid":"https://orcid.org/0000-0002-3187-4937"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barnett, Scott","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080975739","display_name":"Stefanus Kurniawan","orcid":"https://orcid.org/0009-0001-4469-1056"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kurniawan, Stefanus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088904375","display_name":"Zafaryab Rasool","orcid":"https://orcid.org/0000-0002-3603-3125"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rasool, Zafaryab","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5030486012","display_name":"Rajesh Vasa","orcid":"https://orcid.org/0000-0003-4805-1467"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasa, Rajesh","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":84},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10821","display_name":"Cardiovascular Function and Risk Factors","score":0.3914,"subfield":{"id":"https://openalex.org/subfields/2705","display_name":"Cardiology and Cardiovascular Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10821","display_name":"Cardiovascular Function and Risk Factors","score":0.3914,"subfield":{"id":"https://openalex.org/subfields/2705","display_name":"Cardiology and Cardiovascular Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5307923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34432697}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.19019","pdf_url":"http://arxiv.org/pdf/2409.19019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.19019","pdf_url":"http://arxiv.org/pdf/2409.19019","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4396701345","https://openalex.org/W4396696052","https://openalex.org/W4391913857","https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2376932109","https://openalex.org/W2358668433","https://openalex.org/W2001405890"],"abstract_inverted_index":{"Retrieval":[0],"Augmented":[1],"Generation":[2],"(RAG)":[3],"is":[4,19],"increasingly":[5],"being":[6],"used":[7],"when":[8,141,149],"building":[9],"Generative":[10],"AI":[11],"applications.":[12],"Evaluating":[13],"these":[14,169],"applications":[15],"and":[16,26,45,67,153,181,183],"RAG":[17,32,103,123,129,223],"pipelines":[18,33,130],"mostly":[20],"done":[21],"manually,":[22],"via":[23],"a":[24,78,90,111,158,162],"trial":[25],"error":[27],"process.":[28],"Automating":[29],"evaluation":[30,54],"of":[31,84,92,102,222],"requires":[34],"overcoming":[35],"challenges":[36],"such":[37],"as":[38,56,58],"context":[39],"misunderstanding,":[40],"wrong":[41],"format,":[42],"incorrect":[43],"specificity,":[44],"missing":[46],"content.":[47],"Prior":[48],"works":[49],"therefore":[50],"focused":[51,74],"on":[52,75,207],"improving":[53],"metrics":[55],"well":[57],"enhancing":[59],"components":[60],"within":[61],"the":[62,137,196,202,220],"pipeline":[63,104],"using":[64,131],"available":[65],"question":[66],"answer":[68],"datasets.":[69,133,191],"However,":[70],"they":[71],"have":[72],"not":[73],"1)":[76],"providing":[77],"schema":[79],"for":[80,94,113,147,155,164,217,234],"capturing":[81],"different":[82],"types":[83],"question-answer":[85,96,117],"pairs":[86,97,118],"or":[87],"2)":[88],"creating":[89],"set":[91],"templates":[93],"generating":[95,114],"that":[98],"can":[99,226],"support":[100],"automation":[101],"evaluation.":[105],"In":[106],"this":[107],"paper,":[108],"we":[109],"present":[110],"technique":[112],"variations":[115],"in":[116,122,177,189],"to":[119,166],"trigger":[120],"failures":[121],"pipelines.":[124],"We":[125],"validate":[126],"5":[127],"open-source":[128],"3":[132],"Our":[134,192,211],"approach":[135,194,216],"revealed":[136],"highest":[138],"failure":[139,173,185,203],"rates":[140,186],"prompts":[142],"combine":[143],"multiple":[144,151],"questions:":[145],"91%":[146],"questions":[148,156],"spanning":[150],"documents":[152],"78%":[154],"from":[157],"single":[159],"document;":[160],"indicating":[161],"need":[163],"developers":[165],"prioritise":[167],"handling":[168],"combined":[170],"questions.":[171],"60%":[172],"rate":[174,204],"was":[175],"observed":[176,188],"academic":[178],"domain":[179],"dataset":[180],"53%":[182],"62%":[184],"were":[187],"open-domain":[190],"automated":[193,215],"outperforms":[195],"existing":[197,230],"state-of-the-art":[198],"methods,":[199],"by":[200,205],"increasing":[201],"51%":[206],"average":[208],"per":[209],"dataset.":[210],"work":[212],"presents":[213],"an":[214],"continuously":[218],"monitoring":[219],"health":[221],"pipelines,":[224,232],"which":[225],"be":[227],"integrated":[228],"into":[229],"CI/CD":[231],"allowing":[233],"improved":[235],"quality.":[236]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4403810039","counts_by_year":[],"updated_date":"2024-12-10T09:13:12.472201","created_date":"2024-10-28"}