{"id":"https://openalex.org/W3197661809","doi":"https://doi.org/10.1093/bioinformatics/btab636","title":"mspack: efficient lossless and lossy mass spectrometry data compression","display_name":"mspack: efficient lossless and lossy mass spectrometry data compression","publication_year":2021,"publication_date":"2021-09-03","ids":{"openalex":"https://openalex.org/W3197661809","doi":"https://doi.org/10.1093/bioinformatics/btab636","mag":"3197661809","pmid":"https://pubmed.ncbi.nlm.nih.gov/34478503"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btab636","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/37/21/3923/41091708/btab636.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://academic.oup.com/bioinformatics/article-pdf/37/21/3923/41091708/btab636.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031125548","display_name":"Felix Hanau","orcid":"https://orcid.org/0000-0003-1850-2669"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"funder","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Felix Hanau","raw_affiliation_strings":["Department of Computer Science, University of Illinois at Urbana-Champaign , Urbana, IL 61801, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign , Urbana, IL 61801, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058994478","display_name":"Hannes R\u00f6st","orcid":"https://orcid.org/0000-0003-0990-7488"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"funder","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Hannes R\u00f6st","raw_affiliation_strings":["Department of Molecular Genetics, Donnelly Center, University of Toronto , Toronto, ON M5S 3E1, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Molecular Genetics, Donnelly Center, University of Toronto , Toronto, ON M5S 3E1, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045294377","display_name":"Idoia Ochoa","orcid":"https://orcid.org/0000-0003-1864-7868"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"funder","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I88155538","display_name":"Universidad de Navarra","ror":"https://ror.org/02rxc7m23","country_code":"ES","type":"funder","lineage":["https://openalex.org/I88155538"]}],"countries":["ES","US"],"is_corresponding":true,"raw_author_name":"Idoia Ochoa","raw_affiliation_strings":["Department of Electrical Engineering, University of Navarra, Tecnun , Donostia 20018, Spain","Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign , Urbana, IL 61801, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign , Urbana, IL 61801, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Department of Electrical Engineering, University of Navarra, Tecnun , Donostia 20018, Spain","institution_ids":["https://openalex.org/I88155538"]}]}],"institution_assertions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5045294377"],"corresponding_institution_ids":["https://openalex.org/I157725225","https://openalex.org/I88155538"],"apc_list":{"value":3618,"currency":"USD","value_usd":3618},"apc_paid":null,"fwci":0.311,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":4,"citation_normalized_percentile":{"value":0.405089,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":75,"max":78},"biblio":{"volume":"37","issue":"21","first_page":"3923","last_page":"3925"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10836","display_name":"Metabolomics and Mass Spectrometry Studies","score":0.9978,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10683","display_name":"Mass Spectrometry Techniques and Applications","score":0.9975,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10519","display_name":"Advanced Proteomics Techniques and Applications","score":0.9967,"subfield":{"id":"https://openalex.org/subfields/1607","display_name":"Spectroscopy"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lossy-compression","display_name":"Lossy compression","score":0.9015728}],"concepts":[{"id":"https://openalex.org/C165021410","wikidata":"https://www.wikidata.org/wiki/Q55564","display_name":"Lossy compression","level":2,"score":0.9015728},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.85271275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.608962},{"id":"https://openalex.org/C162356407","wikidata":"https://www.wikidata.org/wiki/Q180809","display_name":"Mass spectrometry","level":2,"score":0.56451255},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.54747},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.45193186},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33091637},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.20033208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14190465},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.11355218},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.09740177},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D044962","descriptor_name":"Data Compression","qualifier_ui":"","qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D044962","descriptor_name":"Data Compression","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D059014","descriptor_name":"High-Throughput Nucleotide Sequencing","qualifier_ui":"Q000379","qualifier_name":"methods","is_major_topic":false},{"descriptor_ui":"D013058","descriptor_name":"Mass Spectrometry","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012984","descriptor_name":"Software","qualifier_ui":"","qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btab636","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/37/21/3923/41091708/btab636.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/34478503","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1093/bioinformatics/btab636","pdf_url":"https://academic.oup.com/bioinformatics/article-pdf/37/21/3923/41091708/btab636.pdf","source":{"id":"https://openalex.org/S52395412","display_name":"Bioinformatics","issn_l":"1367-4803","issn":["1367-4803","1367-4811"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311647","https://openalex.org/P4310311648"],"host_organization_lineage_names":["University of Oxford","Oxford University Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":6,"referenced_works":["https://openalex.org/W2023010129","https://openalex.org/W2081465448","https://openalex.org/W2089872141","https://openalex.org/W2146103026","https://openalex.org/W2147010729","https://openalex.org/W2956005825"],"related_works":["https://openalex.org/W755971114","https://openalex.org/W4247601675","https://openalex.org/W4210455546","https://openalex.org/W3180760233","https://openalex.org/W3035703949","https://openalex.org/W2547124190","https://openalex.org/W2385628723","https://openalex.org/W2118338613","https://openalex.org/W1982468865","https://openalex.org/W1970394887"],"abstract_inverted_index":{"Abstract":[0],"Motivation":[1],"Mass":[2],"spectrometry":[3],"(MS)":[4],"data,":[5],"used":[6,138,142],"for":[7,30,73,156,164,192],"proteomics":[8],"and":[9,39,52,81,86,94,105,159,182,200,204,211],"metabolomics":[10],"analyses,":[11],"have":[12,33],"seen":[13],"considerable":[14],"growth":[15],"in":[16,63,209],"the":[17,23,56,92,95,115,144,170,189,193,217],"last":[18],"years.":[19],"Aiming":[20],"at":[21,214,226],"reducing":[22],"associated":[24],"storage":[25],"costs,":[26],"dedicated":[27],"compression":[28,71,126,146,158],"algorithms":[29,43],"MS":[31,74,139],"data":[32,75,223],"been":[34],"proposed,":[35],"such":[36],"as":[37,89,91,167],"MassComp":[38],"MSNumpress.":[40],"However,":[41],"these":[42],"focus":[44],"on":[45,132,150],"either":[46],"lossless":[47,85,103,157],"or":[48,120],"lossy":[49,87,107,165,183,190],"compression,":[50,88,166],"respectively,":[51],"do":[53],"not":[54],"exploit":[55],"additional":[57,79],"redundancy":[58,80],"existing":[59],"across":[60],"scans":[61],"contained":[62],"a":[64,70,110,124],"single":[65],"file.":[66],"We":[67,129],"introduce":[68],"mspack,":[69],"algorithm":[72],"that":[76,82],"exploits":[77],"this":[78],"supports":[83],"both":[84],"well":[90],"mzML":[93],"legacy":[96],"mzXML":[97],"formats.":[98],"mspack":[99,131,148,174,184,206],"applies":[100],"several":[101,133],"preprocessing":[102],"transforms":[104,108],"optional":[106],"with":[109,143,169],"configurable":[111],"error,":[112,195],"followed":[113],"by":[114,136],"general":[116],"purpose":[117],"compressors":[118],"gzip":[119],"bsc":[121,145],"to":[122],"achieve":[123],"higher":[125],"ratio.":[127],"Results":[128],"tested":[130],"datasets":[134],"generated":[135],"commonly":[137],"instruments.":[140],"When":[141],"backend,":[147],"achieves":[149,175],"average":[151],"76%":[152],"smaller":[153,161],"file":[154,162,178],"sizes":[155,163,179],"94%":[160],"compared":[168],"original":[171],"files.":[172],"Lossless":[173],"10\u201360%":[176],"lower":[177],"than":[180,188],"MassComp,":[181],"compresses":[185],"36\u201360%":[186],"better":[187],"MSNumpress,":[191],"same":[194],"while":[196],"exhibiting":[197],"comparable":[198],"accuracy":[199],"running":[201],"time.":[202],"Availability":[203],"implementation":[205],"is":[207],"implemented":[208],"C++":[210],"freely":[212],"available":[213,225],"https://github.com/fhanau/mspack":[215],"under":[216],"Apache":[218],"license.":[219],"Supplementary":[220,222],"information":[221],"are":[224],"Bioinformatics":[227],"online.":[228]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W3197661809","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-04-20T09:40:50.583029","created_date":"2021-09-13"}