{"id":"https://openalex.org/W4285044924","doi":"https://doi.org/10.48550/arxiv.2207.04043","title":"The Harvard USPTO Patent Dataset: A Large-Scale, Well-Structured, and Multi-Purpose Corpus of Patent Applications","display_name":"The Harvard USPTO Patent Dataset: A Large-Scale, Well-Structured, and Multi-Purpose Corpus of Patent Applications","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4285044924","doi":"https://doi.org/10.48550/arxiv.2207.04043"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.04043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2207.04043","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009980324","display_name":"Mirac S\u00fczg\u00fcn","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Suzgun, Mirac","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079016134","display_name":"Luke Melas-Kyriazi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Melas-Kyriazi, Luke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081371955","display_name":"Suproteem K. Sarkar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sarkar, Suproteem K.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074532835","display_name":"Scott Duke Kominers","orcid":"https://orcid.org/0000-0002-7608-6619"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kominers, Scott Duke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5053102850","display_name":"Stuart M. Shieber","orcid":"https://orcid.org/0000-0002-7733-8195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shieber, Stuart M.","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.954774,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":87,"max":89},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10856","display_name":"Intellectual Property and Patents","score":0.996,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10856","display_name":"Intellectual Property and Patents","score":0.996,"subfield":{"id":"https://openalex.org/subfields/1405","display_name":"Management of Technology and Innovation"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12722","display_name":"Innovation Policy and R&D","score":0.9149,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.61246926},{"id":"https://openalex.org/keywords/patent-visualisation","display_name":"Patent visualisation","score":0.44177696}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.73078847},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.64175594},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.61246926},{"id":"https://openalex.org/C2779027411","wikidata":"https://www.wikidata.org/wiki/Q167270","display_name":"Trademark","level":2,"score":0.5675429},{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.5654895},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5397573},{"id":"https://openalex.org/C2780697943","wikidata":"https://www.wikidata.org/wiki/Q7144533","display_name":"Patentability","level":4,"score":0.52784157},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4816674},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.47531444},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45568705},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44834182},{"id":"https://openalex.org/C114419676","wikidata":"https://www.wikidata.org/wiki/Q6505871","display_name":"Patent visualisation","level":2,"score":0.44177696},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38213402},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2707315},{"id":"https://openalex.org/C34974158","wikidata":"https://www.wikidata.org/wiki/Q131257","display_name":"Intellectual property","level":2,"score":0.24335611},{"id":"https://openalex.org/C2984145337","wikidata":"https://www.wikidata.org/wiki/Q253623","display_name":"Patent law","level":3,"score":0.1290695},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C27206212","wikidata":"https://www.wikidata.org/wiki/Q34178","display_name":"Theology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.04043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.04043","pdf_url":"http://arxiv.org/pdf/2207.04043","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2207.04043","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2207.04043","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"score":0.58,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W602020505","https://openalex.org/W4386662124","https://openalex.org/W4320715465","https://openalex.org/W2785385616","https://openalex.org/W2767647695","https://openalex.org/W2594320691","https://openalex.org/W2577991833","https://openalex.org/W2464227586","https://openalex.org/W2186189008","https://openalex.org/W2051163595"],"abstract_inverted_index":{"Innovation":[0],"is":[1,17,103,151],"a":[2,47,71,197,210],"major":[3],"driver":[4],"of":[5,15,32,50,77,124,130,141,157,164,175,187,203,219,238,258],"economic":[6],"and":[7,10,24,30,57,74,87,93,264],"social":[8],"development,":[9],"information":[11],"about":[12],"many":[13],"kinds":[14],"innovation":[16],"embedded":[18],"in":[19,35,117,154,193,229],"semi-structured":[20],"data":[21,37],"from":[22],"patents":[23],"patent":[25,36,79,100,115,125,165,220,259],"applications.":[26],"Although":[27],"the":[28,65,83,121,127,139,147,162,179,201,214,225,230],"impact":[29],"novelty":[31],"innovations":[33],"expressed":[34],"are":[38],"difficult":[39],"to":[40,82,105,135,183,213,234],"measure":[41],"through":[42],"traditional":[43],"means,":[44],"ML":[45],"offers":[46],"promising":[48],"set":[49],"techniques":[51],"for":[52,146,241,252],"evaluating":[53],"novelty,":[54],"summarizing":[55],"contributions,":[56],"embedding":[58],"semantics.":[59],"In":[60],"this":[61,242],"paper,":[62],"we":[63,208,245],"introduce":[64,209],"Harvard":[66],"USPTO":[67],"Patent":[68,86],"Dataset":[69],"(HUPD),":[70],"large-scale,":[72],"well-structured,":[73],"multi-purpose":[75],"corpus":[76],"English-language":[78],"applications":[80],"filed":[81],"United":[84],"States":[85],"Trademark":[88],"Office":[89],"(USPTO)":[90],"between":[91],"2004":[92],"2018.":[94],"With":[95],"more":[96],"than":[97,109],"4.5":[98],"million":[99],"documents,":[101],"HUPD":[102,119,205,248],"two":[104],"three":[106,253],"times":[107],"larger":[108],"comparable":[110],"corpora.":[111],"Unlike":[112],"previously":[113],"proposed":[114],"datasets":[116],"NLP,":[118],"contains":[120],"inventor-submitted":[122],"versions":[123,129],"applications--not":[126],"final":[128],"granted":[131],"patents--thereby":[132],"allowing":[133],"us":[134,233],"study":[136,199],"patentability":[137],"at":[138],"time":[140],"filing":[142],"using":[143],"NLP":[144,188,215],"methods":[145],"first":[148],"time.":[149],"It":[150],"also":[152],"novel":[153],"its":[155,176],"inclusion":[156],"rich":[158],"structured":[159,194,226],"metadata":[160,171,227],"alongside":[161],"text":[163,177],"filings:":[166],"By":[167],"providing":[168],"each":[169],"application's":[170],"along":[172],"with":[173],"all":[174],"fields,":[178],"dataset":[180,231],"enables":[181,232],"researchers":[182],"perform":[184],"new":[185,211],"sets":[186],"tasks":[189],"that":[190],"leverage":[191],"variation":[192],"covariates.":[195],"As":[196],"case":[198],"on":[200],"types":[202],"research":[204],"makes":[206],"possible,":[207],"task":[212],"community--namely,":[216],"binary":[217],"classification":[218,257],"decisions.":[221],"We":[222],"additionally":[223],"show":[224],"provided":[228],"conduct":[235],"explicit":[236],"studies":[237],"concept":[239],"shifts":[240],"task.":[243],"Finally,":[244],"demonstrate":[246],"how":[247],"can":[249],"be":[250],"used":[251],"additional":[254],"tasks:":[255],"multi-class":[256],"subject":[260],"areas,":[261],"language":[262],"modeling,":[263],"summarization.":[265]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4285044924","counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5}],"updated_date":"2024-12-15T18:08:55.245358","created_date":"2022-07-13"}