{"id":"https://openalex.org/W1970810070","doi":"https://doi.org/10.1145/1963192.1963258","title":"Learning to tokenize web domains","display_name":"Learning to tokenize web domains","publication_year":2011,"publication_date":"2011-03-28","ids":{"openalex":"https://openalex.org/W1970810070","doi":"https://doi.org/10.1145/1963192.1963258","mag":"1970810070"},"language":"en","primary_location":{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1963192.1963258","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false},"type":"article","type_crossref":"proceedings-article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017449169","display_name":"Sriram Srinivasan","orcid":"https://orcid.org/0000-0003-0085-309X"},"institutions":[],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sriram Srinivasan","raw_affiliation_strings":["Yahoo! Software Development India, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Yahoo! Software Development India, Bangalore, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108196384","display_name":"Sourangshu Bhattachaya","orcid":null},"institutions":[],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sourangshu Bhattachaya","raw_affiliation_strings":["Yahoo! Labs India, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Yahoo! Labs India, Bangalore, India","institution_ids":[]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.206,"has_fulltext":true,"fulltext_origin":"ngrams","cited_by_count":2,"citation_normalized_percentile":{"value":0.374699,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":72,"max":76},"biblio":{"volume":null,"issue":null,"first_page":"129","last_page":"130"},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9966,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monetization","display_name":"Monetization","score":0.80162287},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.4541478}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83584803},{"id":"https://openalex.org/C2780602052","wikidata":"https://www.wikidata.org/wiki/Q289845","display_name":"Monetization","level":2,"score":0.80162287},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.611473},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6105419},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.60889775},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.60823256},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.58793086},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.565299},{"id":"https://openalex.org/C195487862","wikidata":"https://www.wikidata.org/wiki/Q850210","display_name":"Revenue","level":2,"score":0.5202528},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.50972337},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.49065605},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.4541478},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22727785},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.077320725},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"is_oa":false,"landing_page_url":"https://doi.org/10.1145/1963192.1963258","pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.4}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":4,"referenced_works":["https://openalex.org/W147273232","https://openalex.org/W1583837637","https://openalex.org/W2036516910","https://openalex.org/W2147880316"],"related_works":["https://openalex.org/W72090","https://openalex.org/W4323565542","https://openalex.org/W4312342191","https://openalex.org/W4297808986","https://openalex.org/W4296873905","https://openalex.org/W4240504770","https://openalex.org/W2990215692","https://openalex.org/W2956015248","https://openalex.org/W2520281795","https://openalex.org/W20878794"],"abstract_inverted_index":{"Domain":[0,66],"Match":[1,67],"is":[2,32,56],"an":[3,57],"Internet":[4],"monetization":[5],"product":[6,14,41,59],"offered":[7],"by":[8,70],"web":[9],"companies":[10,51],"like":[11,52],"Yahoo!":[12,53],"The":[13],"offers":[15],"display":[16],"of":[17,45,62,114,125,144],"ads":[18,82],"and":[19,75,79,83,116,123,154],"search":[20,84],"results,":[21],"when":[22],"a":[23,26,29,95,108,112,148,155],"user":[24],"requests":[25],"webpage":[27],"from":[28],"domain":[30],"which":[31,100],"non-existent":[33],"or":[34],"does":[35],"not":[36],"have":[37],"any":[38],"content.":[39],"This":[40],"earns":[42],"significant":[43],"amount":[44],"advertising":[46],"revenue":[47],"for":[48,129],"major":[49],"internet":[50],"Hence":[54],"it":[55],"important":[58],"receiving":[60],"millions":[61],"queries":[63],"per":[64],"day.":[65],"(DM)":[68],"works":[69],"tokenizing":[71],"the":[72,88,146],"input":[73],"domains":[74,115],"sub-folders":[76],"into":[77],"keywords":[78],"then":[80],"displaying":[81],"results":[85,160],"queried":[86],"on":[87],"keywords.":[89],"In":[90],"this":[91],"poster,":[92],"we":[93],"describe":[94],"machine":[96],"learning":[97],"based":[98,152],"solution,":[99],"automatically":[101],"learns":[102],"to":[103],"tokenize":[104],"new":[105],"domains,":[106],"given":[107],"training":[109,145,153],"dataset":[110],"containing":[111],"set":[113],"their":[117],"tokenizations.":[118],"We":[119,140],"use":[120],"positional":[121],"frequency":[122],"parts":[124],"speech":[126],"as":[127],"features":[128],"scoring":[130,138],"tokens.":[131],"Tokens":[132],"are":[133,161],"scored":[134],"combined":[135],"using":[136],"various":[137],"models.":[139],"compare":[141],"two":[142],"ways":[143],"models:":[147],"simple":[149],"gain":[150],"function":[151],"large":[156],"margin":[157],"training.":[158],"Experimental":[159],"encouraging.":[162]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W1970810070","counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-02-17T00:45:10.488688","created_date":"2016-06-24"}