{"id":"https://openalex.org/W2126743076","doi":"https://doi.org/10.1109/cloud.2012.118","title":"Evaluating Hadoop for Data-Intensive Scientific Operations","display_name":"Evaluating Hadoop for Data-Intensive Scientific Operations","publication_year":2012,"publication_date":"2012-06-01","ids":{"openalex":"https://openalex.org/W2126743076","doi":"https://doi.org/10.1109/cloud.2012.118","mag":"2126743076"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloud.2012.118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001486479","display_name":"Zacharia Fadika","orcid":null},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zacharia Fadika","raw_affiliation_strings":["SUNY Binghamton, Binghamton, NY, USA"],"affiliations":[{"raw_affiliation_string":"SUNY Binghamton, Binghamton, NY, USA","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112335923","display_name":"Madhusudhan Govindaraju","orcid":null},"institutions":[{"id":"https://openalex.org/I123946342","display_name":"Binghamton University","ror":"https://ror.org/008rmbt77","country_code":"US","type":"education","lineage":["https://openalex.org/I123946342"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Madhusudhan Govindaraju","raw_affiliation_strings":["SUNY Binghamton, Binghamton, NY"],"affiliations":[{"raw_affiliation_string":"SUNY Binghamton, Binghamton, NY","institution_ids":["https://openalex.org/I123946342"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087205619","display_name":"R. S. Canon","orcid":null},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Richard Canon","raw_affiliation_strings":["Lawrence Berekely National Lab Berkeley, CA","Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]},{"raw_affiliation_string":"Lawrence Berekely National Lab Berkeley, CA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038208900","display_name":"Lavanya Ramakrishnan","orcid":"https://orcid.org/0000-0003-1761-4132"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lavanya Ramakrishnan","raw_affiliation_strings":["Lawrence Berekely National Lab Berkeley, CA","Lawrence Berkeley National Laboratory, Berkeley, CA, USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berekely National Lab Berkeley, CA","institution_ids":["https://openalex.org/I148283060"]},{"raw_affiliation_string":"Lawrence Berkeley National Laboratory, Berkeley, CA, USA","institution_ids":["https://openalex.org/I148283060"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.422,"has_fulltext":false,"cited_by_count":34,"citation_normalized_percentile":{"value":0.889228,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":94},"biblio":{"volume":"5976","issue":null,"first_page":"67","last_page":"74"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9999,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9945,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.992,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/merge","display_name":"Merge (version control)","score":0.4685795}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8644204},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6323635},{"id":"https://openalex.org/C76831024","wikidata":"https://www.wikidata.org/wiki/Q5227096","display_name":"Data-intensive computing","level":4,"score":0.6313242},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.56134},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.47573495},{"id":"https://openalex.org/C2778787235","wikidata":"https://www.wikidata.org/wiki/Q49007","display_name":"Yarn","level":2,"score":0.47155046},{"id":"https://openalex.org/C197129107","wikidata":"https://www.wikidata.org/wiki/Q1921621","display_name":"Merge (version control)","level":2,"score":0.4685795},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.4664886},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4564015},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.44170147},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.43883264},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.42113328},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.37434077},{"id":"https://openalex.org/C70429105","wikidata":"https://www.wikidata.org/wiki/Q249999","display_name":"Grid computing","level":3,"score":0.31660676},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.28945607},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.23909178},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/cloud.2012.118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":25,"referenced_works":["https://openalex.org/W126394981","https://openalex.org/W1547229954","https://openalex.org/W1575777113","https://openalex.org/W1834532152","https://openalex.org/W1990102402","https://openalex.org/W2010279913","https://openalex.org/W2018506220","https://openalex.org/W2022486753","https://openalex.org/W2026575055","https://openalex.org/W2044490410","https://openalex.org/W2048295208","https://openalex.org/W2080686285","https://openalex.org/W2094022523","https://openalex.org/W2102130607","https://openalex.org/W2106802957","https://openalex.org/W2119738171","https://openalex.org/W2121762798","https://openalex.org/W2133873843","https://openalex.org/W2150884640","https://openalex.org/W2153527235","https://openalex.org/W2155072926","https://openalex.org/W2158865579","https://openalex.org/W2159825340","https://openalex.org/W2173213060","https://openalex.org/W3141318050"],"related_works":["https://openalex.org/W4297792185","https://openalex.org/W3004288367","https://openalex.org/W2766143738","https://openalex.org/W2353464440","https://openalex.org/W2188069249","https://openalex.org/W2158030964","https://openalex.org/W2143819726","https://openalex.org/W2127197365","https://openalex.org/W2088745459","https://openalex.org/W1585052355"],"abstract_inverted_index":{"Emerging":[0],"sensor":[1],"networks,":[2],"more":[3],"capable":[4],"instruments,":[5],"and":[6,26,69,82,92,118,135,143,174],"ever":[7],"increasing":[8],"simulation":[9],"scales":[10],"are":[11],"generating":[12],"data":[13,53,90,111,154],"at":[14],"a":[15,49,102],"rate":[16],"that":[17,109],"exceeds":[18],"our":[19],"ability":[20],"to":[21,33,55,124,137,165],"effectively":[22],"manage,":[23],"curate,":[24],"analyze,":[25],"share":[27],"it.":[28],"Data-intensive":[29],"computing":[30],"is":[31,101,122],"expected":[32],"revolutionize":[34],"the":[35,45,71,106,157,167,170],"next-generation":[36],"software":[37],"stack.":[38],"Hadoop,":[39],"an":[40],"open":[41],"source":[42],"implementation":[43],"of":[44,61,159,169],"MapReduce":[46,81,117],"model":[47,72],"provides":[48],"way":[50],"for":[51,77,88,97,128,152],"large":[52,62],"volumes":[54],"be":[56],"seamlessly":[57],"processed":[58],"through":[59],"use":[60],"commodity":[63],"computers.":[64],"The":[65],"inherent":[66],"parallelization,":[67],"synchronization":[68],"fault-tolerance":[70],"offers,":[73],"makes":[74],"it":[75,121],"ideal":[76],"highly-parallel":[78],"data-intensive":[79,129],"applications.":[80,99],"Hadoop":[83,126,151],"have":[84],"traditionally":[85],"been":[86,95],"used":[87,96],"web":[89],"processing":[91],"only":[93],"recently":[94],"scientific":[98,110,130],"There":[100],"limited":[103],"understanding":[104],"on":[105,177],"performance":[107,144],"characteristics":[108],"intensive":[112],"applications":[113],"can":[114],"obtain":[115],"from":[116],"Hadoop.":[119],"Thus,":[120],"important":[123],"evaluate":[125,150],"specifically":[127],"operations":[131,155],"--":[132],"filter,":[133],"merge":[134],"reorder--":[136],"understand":[138,166],"its":[139],"various":[140],"design":[141],"considerations":[142],"trade-offs.":[145],"In":[146],"this":[147],"paper,":[148],"we":[149],"these":[153],"in":[156],"context":[158],"High":[160],"Performance":[161],"Computing":[162],"(HPC)":[163],"environments":[164],"impact":[168],"file":[171],"system,":[172],"network":[173],"programming":[175],"modes":[176],"performance.":[178]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2126743076","counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":1}],"updated_date":"2025-01-09T07:21:10.657961","created_date":"2016-06-24"}