{"id":"https://openalex.org/W2963068615","doi":"https://doi.org/10.1109/tit.2019.2898412","title":"Determining the Number of Samples Required to Estimate Entropy in Natural Sequences","display_name":"Determining the Number of Samples Required to Estimate Entropy in Natural Sequences","publication_year":2019,"publication_date":"2019-02-09","ids":{"openalex":"https://openalex.org/W2963068615","doi":"https://doi.org/10.1109/tit.2019.2898412","mag":"2963068615"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2019.2898412","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"journal-article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://arxiv.org/pdf/1805.08929","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086895345","display_name":"Andrew D. Back","orcid":"https://orcid.org/0000-0001-5474-1910"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"funder","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Andrew D. Back","raw_affiliation_strings":["School of ITEE, The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"School of ITEE, The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023853044","display_name":"Daniel Angus","orcid":"https://orcid.org/0000-0002-1412-5096"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"funder","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Daniel Angus","raw_affiliation_strings":["School of ITEE, The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"School of ITEE, The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013458161","display_name":"Janet Wiles","orcid":"https://orcid.org/0000-0002-4051-4116"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"funder","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Janet Wiles","raw_affiliation_strings":["School of ITEE, The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"School of ITEE, The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.844,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":7,"citation_normalized_percentile":{"value":0.833785,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":82,"max":83},"biblio":{"volume":"65","issue":"7","first_page":"4345","last_page":"4352"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9499,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12261","display_name":"Statistical Mechanics and Entropy","score":0.9323,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12946","display_name":"Fractal and DNA sequence analysis","score":0.9177,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/zipfs-law","display_name":"Zipf's law","score":0.77161217},{"id":"https://openalex.org/keywords/information-theory","display_name":"Information Theory","score":0.48541674},{"id":"https://openalex.org/keywords/min-entropy","display_name":"Min entropy","score":0.48519185}],"concepts":[{"id":"https://openalex.org/C125932096","wikidata":"https://www.wikidata.org/wiki/Q205472","display_name":"Zipf's law","level":2,"score":0.77161217},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.6662108},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.656649},{"id":"https://openalex.org/C60507348","wikidata":"https://www.wikidata.org/wiki/Q6795892","display_name":"Maximum entropy probability distribution","level":3,"score":0.52976376},{"id":"https://openalex.org/C142611142","wikidata":"https://www.wikidata.org/wiki/Q1433083","display_name":"R\u00e9nyi entropy","level":3,"score":0.49342003},{"id":"https://openalex.org/C52622258","wikidata":"https://www.wikidata.org/wiki/Q131222","display_name":"Information theory","level":2,"score":0.48541674},{"id":"https://openalex.org/C196083917","wikidata":"https://www.wikidata.org/wiki/Q17103874","display_name":"Min entropy","level":5,"score":0.48519185},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.4815424},{"id":"https://openalex.org/C177409962","wikidata":"https://www.wikidata.org/wiki/Q2411312","display_name":"Shannon's source coding theorem","level":5,"score":0.47665817},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.43709105},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3484769},{"id":"https://openalex.org/C125252325","wikidata":"https://www.wikidata.org/wiki/Q1345213","display_name":"Entropy rate","level":4,"score":0.3365031},{"id":"https://openalex.org/C49775889","wikidata":"https://www.wikidata.org/wiki/Q6795896","display_name":"Maximum entropy thermodynamics","level":4,"score":0.27310938},{"id":"https://openalex.org/C42047476","wikidata":"https://www.wikidata.org/wiki/Q6269544","display_name":"Joint quantum entropy","level":3,"score":0.22597894},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/tit.2019.2898412","pdf_url":null,"source":{"id":"https://openalex.org/S4502562","display_name":"IEEE Transactions on Information Theory","issn_l":"0018-9448","issn":["0018-9448","1557-9654"],"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.08929","pdf_url":"http://arxiv.org/pdf/1805.08929","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/1805.08929","pdf_url":"http://arxiv.org/pdf/1805.08929","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":37,"referenced_works":["https://openalex.org/W1507719567","https://openalex.org/W1967712865","https://openalex.org/W1980499355","https://openalex.org/W1986908574","https://openalex.org/W1988645907","https://openalex.org/W1994463867","https://openalex.org/W1995806138","https://openalex.org/W1995875735","https://openalex.org/W2018891628","https://openalex.org/W2019588402","https://openalex.org/W2023112529","https://openalex.org/W2023761963","https://openalex.org/W2025533349","https://openalex.org/W2027321393","https://openalex.org/W2027690951","https://openalex.org/W2048176942","https://openalex.org/W2050715692","https://openalex.org/W2053782355","https://openalex.org/W2061119788","https://openalex.org/W2068506707","https://openalex.org/W2078206416","https://openalex.org/W2079145130","https://openalex.org/W2081279294","https://openalex.org/W2082382271","https://openalex.org/W2088088096","https://openalex.org/W2099747733","https://openalex.org/W2101718061","https://openalex.org/W2108026887","https://openalex.org/W2129802470","https://openalex.org/W2160943512","https://openalex.org/W2170521549","https://openalex.org/W2183162925","https://openalex.org/W2265318195","https://openalex.org/W2572539223","https://openalex.org/W2993383518","https://openalex.org/W3101791345","https://openalex.org/W3105788277"],"related_works":["https://openalex.org/W2949606031","https://openalex.org/W2898188192","https://openalex.org/W2884476384","https://openalex.org/W2398403988","https://openalex.org/W2330170324","https://openalex.org/W2234812299","https://openalex.org/W2159862004","https://openalex.org/W2139507220","https://openalex.org/W2071171073","https://openalex.org/W2045693826"],"abstract_inverted_index":{"Calculating":[0],"the":[1,21,59,83,110,123],"Shannon":[2,90],"entropy":[3,23,91,134],"for":[4,81,92,122],"symbolic":[5,97],"sequences":[6,51],"has":[7],"been":[8],"widely":[9],"considered":[10],"in":[11,46],"many":[12,63],"fields.":[13],"For":[14],"descriptive":[15],"statistical":[16],"problems":[17],"such":[18],"as":[19,34,37],"estimating":[20,82],"N-gram":[22],"of":[24,61,85,95,126,133,142],"English":[25],"language":[26],"text,":[27],"a":[28,79,93,105,136],"common":[29],"approach":[30],"is":[31,102],"to":[32,39,58,67,88,129],"use":[33],"much":[35],"data":[36],"possible":[38],"obtain":[40,130],"progressively":[41],"more":[42],"accurate":[43],"estimates.":[44],"However,":[45],"some":[47],"instances,":[48],"only":[49],"short":[50],"may":[52],"be":[53],"available.":[54],"This":[55],"gives":[56],"rise":[57],"question":[60],"how":[62],"samples":[64,86,127],"are":[65],"needed":[66],"compute":[68,89],"entropy.":[69],"In":[70],"this":[71,75],"paper,":[72],"we":[73,114],"examine":[74],"problem":[76],"and":[77,109,113,140],"propose":[78,115],"method":[80],"number":[84,125],"required":[87,128],"set":[94],"ranked":[96],"\"natural\"":[98],"events.":[99],"The":[100],"result":[101],"developed":[103],"using":[104],"modified":[106],"Zipf-Mandelbrot":[107],"law":[108],"Dvoretzky-Kiefer-Wolfowitz":[111],"inequality,":[112],"an":[116,120,131],"approximation":[117],"which":[118],"yields":[119],"estimate":[121,132],"minimum":[124],"with":[135],"given":[137],"confidence":[138],"level":[139],"degree":[141],"accuracy.":[143]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2963068615","counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-03-23T13:19:38.384466","created_date":"2019-07-30"}