{"id":"https://openalex.org/W4399927849","doi":"https://doi.org/10.21105/joss.06684","title":"cuallee: A Python package for data quality checks across multiple DataFrame APIs","display_name":"cuallee: A Python package for data quality checks across multiple DataFrame APIs","publication_year":2024,"publication_date":"2024-06-23","ids":{"openalex":"https://openalex.org/W4399927849","doi":"https://doi.org/10.21105/joss.06684"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.21105/joss.06684","pdf_url":"https://joss.theoj.org/papers/10.21105/joss.06684.pdf","source":{"id":"https://openalex.org/S4210214273","display_name":"The Journal of Open Source Software","issn_l":"2475-9066","issn":["2475-9066"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310315853","host_organization_name":"Open Journals","host_organization_lineage":["https://openalex.org/P4310315853"],"host_organization_lineage_names":["Open Journals"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://joss.theoj.org/papers/10.21105/joss.06684.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085086546","display_name":"Herminio Vazquez","orcid":"https://orcid.org/0000-0003-1937-8006"},"institutions":[],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Herminio Vazquez","raw_affiliation_strings":["Independent Researcher, Mexico"],"affiliations":[{"raw_affiliation_string":"Independent Researcher, Mexico","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010856740","display_name":"Virginie Grosboillot","orcid":"https://orcid.org/0000-0002-8249-7182"},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"funder","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Virginie Grosboillot","raw_affiliation_strings":["Swiss Federal Institute of Technology (ETH)"],"affiliations":[{"raw_affiliation_string":"Swiss Federal Institute of Technology (ETH)","institution_ids":["https://openalex.org/I5124864"]}]}],"institution_assertions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":0,"currency":"USD","value_usd":0},"apc_paid":null,"fwci":1.521,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.806312,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":78,"max":89},"biblio":{"volume":"9","issue":"98","first_page":"6684","last_page":"6684"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9984,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T14280","display_name":"Big Data Technologies and Applications","score":0.988,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9843,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/python","display_name":"Python","score":0.85442126},{"id":"https://openalex.org/keywords/r-package","display_name":"R package","score":0.41208208}],"concepts":[{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.85442126},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7187797},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.56004846},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.4545197},{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.41208208},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.40280843}],"mesh":[],"locations_count":1,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.21105/joss.06684","pdf_url":"https://joss.theoj.org/papers/10.21105/joss.06684.pdf","source":{"id":"https://openalex.org/S4210214273","display_name":"The Journal of Open Source Software","issn_l":"2475-9066","issn":["2475-9066"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310315853","host_organization_name":"Open Journals","host_organization_lineage":["https://openalex.org/P4310315853"],"host_organization_lineage_names":["Open Journals"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.21105/joss.06684","pdf_url":"https://joss.theoj.org/papers/10.21105/joss.06684.pdf","source":{"id":"https://openalex.org/S4210214273","display_name":"The Journal of Open Source Software","issn_l":"2475-9066","issn":["2475-9066"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":false,"is_core":true,"host_organization":"https://openalex.org/P4310315853","host_organization_name":"Open Journals","host_organization_lineage":["https://openalex.org/P4310315853"],"host_organization_lineage_names":["Open Journals"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":8,"referenced_works":["https://openalex.org/W2038412523","https://openalex.org/W2341844252","https://openalex.org/W2889249015","https://openalex.org/W4285007070","https://openalex.org/W4292493019","https://openalex.org/W4380442492","https://openalex.org/W4380447145","https://openalex.org/W4385568252"],"related_works":["https://openalex.org/W4297799326","https://openalex.org/W4287027380","https://openalex.org/W3193760048","https://openalex.org/W3187193180","https://openalex.org/W3116064965","https://openalex.org/W2341492732","https://openalex.org/W2207495067","https://openalex.org/W1906486629","https://openalex.org/W1699080303","https://openalex.org/W106542691"],"abstract_inverted_index":{"In":[0],"today's":[1],"world,":[2],"where":[3,14],"vast":[4],"amounts":[5],"of":[6,30,102],"data":[7,15,32,46,60],"are":[8],"generated":[9],"and":[10,13,20,37,43,70,89],"collected":[11],"daily,":[12],"heavily":[16],"influence":[17],"business,":[18],"political,":[19],"societal":[21],"decisions,":[22],"it":[23],"is":[24,62],"crucial":[25],"to":[26,64,96],"evaluate":[27,99],"the":[28,31,45,77,100],"quality":[29,101],"used":[33],"for":[34,58,73],"analysis,":[35],"decision-making,":[36],"reporting.This":[38],"involves":[39],"understanding":[40],"how":[41],"reliable":[42],"trustworthy":[44],"are.To":[47],"address":[48],"this":[49],"need,":[50],"we":[51],"have":[52],"created":[53],"cuallee,":[54],"a":[55],"Python":[56],"package":[57],"assessing":[59],"quality.cuallee":[61],"designed":[63],"be":[65],"dataframe-agnostic,":[66],"offering":[67],"an":[68],"intuitive":[69],"user-friendly":[71],"API":[72],"describing":[74],"checks":[75,95],"across":[76],"most":[78],"popular":[79],"dataframe":[80],"implementations":[81],"such":[82],"as":[83],"PySpark,":[84],"Pandas,":[85],"Snowpark,":[86],"Polars,":[87],"DuckDB,":[88],"BigQuery.Currently,":[90],"cuallee":[91],"offers":[92],"over":[93],"50":[94],"help":[97],"users":[98],"their":[103],"data.":[104]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4399927849","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-04T12:15:13.640633","created_date":"2024-06-24"}