{"id":"https://openalex.org/W2083187798","doi":"https://doi.org/10.1109/bibm.2014.6999306","title":"Adopting the MapReduce framework to pre-train 1-D and 2-D protein structure predictors with large protein datasets","display_name":"Adopting the MapReduce framework to pre-train 1-D and 2-D protein structure predictors with large protein datasets","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W2083187798","doi":"https://doi.org/10.1109/bibm.2014.6999306","mag":"2083187798"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999306","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033383597","display_name":"Jesse Eickholt","orcid":"https://orcid.org/0000-0002-1764-1838"},"institutions":[{"id":"https://openalex.org/I1629065","display_name":"Central Michigan University","ror":"https://ror.org/02xawj266","country_code":"US","type":"funder","lineage":["https://openalex.org/I1629065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jesse Eickholt","raw_affiliation_strings":["Department of Computer Science Central Michigan University Mount Pleasant MI USA."],"affiliations":[{"raw_affiliation_string":"Department of Computer Science Central Michigan University Mount Pleasant MI USA.","institution_ids":["https://openalex.org/I1629065"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102909198","display_name":"Suman Karki","orcid":null},"institutions":[{"id":"https://openalex.org/I1629065","display_name":"Central Michigan University","ror":"https://ror.org/02xawj266","country_code":"US","type":"funder","lineage":["https://openalex.org/I1629065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suman Karki","raw_affiliation_strings":["Department of Computer Science Central Michigan University Mount Pleasant MI USA."],"affiliations":[{"raw_affiliation_string":"Department of Computer Science Central Michigan University Mount Pleasant MI USA.","institution_ids":["https://openalex.org/I1629065"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":66},"biblio":{"volume":null,"issue":null,"first_page":"23","last_page":"29"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9972,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9968,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9835,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7850878},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.54875356}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7850878},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7741288},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7640557},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5853986},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.56652796},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.54875356},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.49626237},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.47049093},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4578184},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.41409665},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.30041498},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C46141821","wikidata":"https://www.wikidata.org/wiki/Q209402","display_name":"Nuclear magnetic resonance","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999306","pdf_url":null,"source":{"id":"https://openalex.org/S4363607735","display_name":"2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":33,"referenced_works":["https://openalex.org/W1813659000","https://openalex.org/W1821638548","https://openalex.org/W1828921065","https://openalex.org/W1993882792","https://openalex.org/W2031767704","https://openalex.org/W2033636932","https://openalex.org/W2035018017","https://openalex.org/W2055644676","https://openalex.org/W2058715873","https://openalex.org/W2060107523","https://openalex.org/W2060422862","https://openalex.org/W2062227835","https://openalex.org/W2062920004","https://openalex.org/W2075849238","https://openalex.org/W2076048958","https://openalex.org/W2076978869","https://openalex.org/W2100495367","https://openalex.org/W2108101947","https://openalex.org/W2116064496","https://openalex.org/W2126715624","https://openalex.org/W2136922672","https://openalex.org/W2138755951","https://openalex.org/W2141125852","https://openalex.org/W2156125289","https://openalex.org/W2156465034","https://openalex.org/W2157355837","https://openalex.org/W2158714788","https://openalex.org/W2160784118","https://openalex.org/W2173213060","https://openalex.org/W2955074472","https://openalex.org/W2995564009","https://openalex.org/W3214074647","https://openalex.org/W44815768"],"related_works":["https://openalex.org/W3215498386","https://openalex.org/W3171039768","https://openalex.org/W2946599741","https://openalex.org/W2593264178","https://openalex.org/W2368468053","https://openalex.org/W2294851134","https://openalex.org/W2136856901","https://openalex.org/W2058542300","https://openalex.org/W2043066834","https://openalex.org/W1564749278"],"abstract_inverted_index":{"Sequence":[0],"based":[1],"machine":[2,30],"learning":[3,31],"approaches":[4],"for":[5,81],"1-D":[6,82,131],"and":[7,76,83,132],"2-D":[8,84,133],"protein":[9,57,70,85,127,134,146],"structure":[10,58,86,135],"prediction":[11,87],"tasks":[12],"have":[13,151],"long":[14],"been":[15],"limited":[16],"by":[17],"relatively":[18],"small":[19],"datasets,":[20],"namely":[21],"proteins":[22],"with":[23,154],"experimentally":[24],"determined":[25],"structure.":[26],"Recent":[27],"advances":[28],"in":[29,53],"provide":[32],"a":[33,41,48,63,68,144],"means":[34],"of":[35,56,93,125],"using":[36,102],"unlabeled":[37],"data":[38,75,129],"and,":[39],"as":[40,114],"result,":[42],"this":[43],"opens":[44],"up":[45],"access":[46],"to":[47,66,109,121,130,142],"much":[49],"larger":[50],"sequence":[51,71,128,147],"space":[52],"the":[54,91,95,103,119],"context":[55],"prediction.":[59,136],"Here":[60],"we":[61,98,150],"present":[62],"3-stage":[64],"pipeline":[65,101,141],"construct":[67],"representative":[69],"dataset,":[72,97],"generate":[73],"training":[74],"pre-train":[77],"deep":[78],"network":[79],"models":[80],"tasks.":[88],"To":[89],"handle":[90],"complexities":[92],"managing":[94],"large":[96,123],"implemented":[99],"our":[100,140],"MapReduce":[104],"framework.":[105],"This":[106],"allowed":[107],"us":[108],"leverage":[110],"existing":[111],"tools":[112],"such":[113],"Hadoop.":[115],"The":[116],"result":[117],"is":[118],"ability":[120],"apply":[122],"amounts":[124],"novel,":[126],"We":[137],"also":[138],"used":[139],"curate":[143],"non-redundant":[145],"dataset":[148],"that":[149],"made":[152],"available":[153],"accompanying":[155],"data.":[156]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2083187798","counts_by_year":[],"updated_date":"2025-01-25T14:09:28.020937","created_date":"2016-06-24"}