{"id":"https://openalex.org/W4205193389","doi":"https://doi.org/10.1109/bigdata52589.2021.9671603","title":"Exploratory Data Analysis with Database-backed Dataframes: A Case Study on Airbnb Data","display_name":"Exploratory Data Analysis with Database-backed Dataframes: A Case Study on Airbnb Data","publication_year":2021,"publication_date":"2021-12-15","ids":{"openalex":"https://openalex.org/W4205193389","doi":"https://doi.org/10.1109/bigdata52589.2021.9671603"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671603","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074096341","display_name":"Phanwadee Sinthong","orcid":"https://orcid.org/0009-0006-4423-3860"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"funder","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Phanwadee Sinthong","raw_affiliation_strings":["Dept. of Computer Science, University of California, Irvine"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of California, Irvine","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011512061","display_name":"Michael J. Carey","orcid":"https://orcid.org/0000-0003-1561-4059"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"funder","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael J. Carey","raw_affiliation_strings":["Dept. of Computer Science, University of California, Irvine"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of California, Irvine","institution_ids":["https://openalex.org/I204250578"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.133,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.24164,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":56,"max":66},"biblio":{"volume":null,"issue":null,"first_page":"3119","last_page":"3129"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9993,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9988,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9953,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.5247624},{"id":"https://openalex.org/keywords/exploratory-data-analysis","display_name":"Exploratory data analysis","score":0.4640929},{"id":"https://openalex.org/keywords/distributed-database","display_name":"Distributed database","score":0.4136039}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.836779},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.69630384},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.61056024},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.53835446},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5335999},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.5247624},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4697027},{"id":"https://openalex.org/C120894424","wikidata":"https://www.wikidata.org/wiki/Q1322871","display_name":"Exploratory data analysis","level":2,"score":0.4640929},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.41485718},{"id":"https://openalex.org/C70061542","wikidata":"https://www.wikidata.org/wiki/Q989016","display_name":"Distributed database","level":2,"score":0.4136039},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29664892},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata52589.2021.9671603","pdf_url":null,"source":{"id":"https://openalex.org/S4363607718","display_name":"2021 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":13,"referenced_works":["https://openalex.org/W1600215619","https://openalex.org/W2050516909","https://openalex.org/W2101234009","https://openalex.org/W2265297126","https://openalex.org/W2342249984","https://openalex.org/W2949708822","https://openalex.org/W2980180246","https://openalex.org/W3008534594","https://openalex.org/W3082494217","https://openalex.org/W3135575949","https://openalex.org/W3196910658","https://openalex.org/W4287642524","https://openalex.org/W4295830359"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4387885925","https://openalex.org/W4247566972","https://openalex.org/W4233347783","https://openalex.org/W4206777497","https://openalex.org/W3090563135","https://openalex.org/W2960264696","https://openalex.org/W2497432351","https://openalex.org/W1990063575","https://openalex.org/W1977025641"],"abstract_inverted_index":{"Choosing":[0],"between":[1],"various":[2],"scalable":[3,79],"dataframe":[4,65,87],"libraries":[5],"can":[6],"be":[7],"an":[8,68],"overwhelming":[9],"task":[10],"for":[11,148],"data":[12,52,71,80,103,111,115],"scientists":[13,104],"but":[14],"it":[15],"is":[16,77],"critical":[17],"because":[18],"each":[19,33,149],"framework":[20,34],"deploys":[21],"a":[22,36,57,63,78,85,92,113,142],"different":[23],"optimization":[24],"technique":[25],"that":[26,83],"could":[27],"affect":[28],"the":[29,47,133,151,154],"overall":[30],"performance.":[31],"Comparing":[32],"on":[35,67,89],"set":[37],"of":[38,50,60,91,94,99,110,138,150,153],"analytical":[39],"tasks":[40],"in":[41],"isolation":[42],"might":[43],"not":[44],"fully":[45],"represent":[46],"unique":[48],"characteristics":[49],"big":[51],"analyses.":[53],"This":[54],"paper":[55],"describes":[56],"case":[58,128],"study":[59,129],"applying":[61],"PolyFrame,":[62],"database-backed":[64,139],"library,":[66],"end-to-end":[69],"exploratory":[70],"analysis":[72,116],"involving":[73],"Airbnb":[74],"data.":[75],"PolyFrame":[76],"analytics":[81],"library":[82],"provides":[84],"Pandas-like":[86],"interface":[88,101],"top":[90],"variety":[93],"database":[95,121],"systems.":[96],"The":[97],"familiarity":[98],"its":[100],"enables":[102],"to":[105],"interact":[106],"with":[107,145],"large":[108],"collections":[109],"through":[112],"scale-independent":[114],"experience":[117],"without":[118],"needing":[119],"significant":[120],"or":[122],"distributed":[123],"systems":[124],"knowledge.":[125],"Throughout":[126],"this":[127],"we":[130],"also":[131],"highlight":[132],"scalability":[134],"benefits":[135],"and":[136],"limitations":[137],"dataframes":[140,147],"via":[141],"performance":[143],"comparison":[144],"Pandas":[146],"stages":[152],"analysis.":[155]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4205193389","counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-04-18T08:33:02.823310","created_date":"2022-01-25"}