{"id":"https://openalex.org/W4386185446","doi":"https://doi.org/10.48550/arxiv.2308.12817","title":"MixNet: Toward Accurate Detection of Challenging Scene Text in the Wild","display_name":"MixNet: Toward Accurate Detection of Challenging Scene Text in the Wild","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386185446","doi":"https://doi.org/10.48550/arxiv.2308.12817"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.12817","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2308.12817","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101310689","display_name":"Yuxiang Zeng","orcid":"https://orcid.org/0000-0003-0126-3083"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zeng, Yu-Xiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007240491","display_name":"Jun-Wei Hsieh","orcid":"https://orcid.org/0000-0003-4729-0730"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hsieh, Jun-Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100353721","display_name":"Xin Li","orcid":"https://orcid.org/0000-0001-6888-7064"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Xin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5012638803","display_name":"Ming-Ching Chang","orcid":"https://orcid.org/0000-0001-9325-5341"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chang, Ming-Ching","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.919075,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":82,"max":85},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9815,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12707","display_name":"Vehicle License Plate Recognition","score":0.9804,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/shuffling","display_name":"Shuffling","score":0.56129724},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5482696},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42544064}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7492795},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6506708},{"id":"https://openalex.org/C167927819","wikidata":"https://www.wikidata.org/wiki/Q1930567","display_name":"Shuffling","level":2,"score":0.56129724},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5482696},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5029518},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.48562828},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.46601465},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42544064},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3410139},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.12817","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.12817","pdf_url":"http://arxiv.org/pdf/2308.12817","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2308.12817","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2308.12817","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[{"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11","score":0.73}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4390419005","https://openalex.org/W4388857216","https://openalex.org/W4377131110","https://openalex.org/W4294559962","https://openalex.org/W4213239787","https://openalex.org/W2949444602","https://openalex.org/W2114337652","https://openalex.org/W2046590706","https://openalex.org/W2045745654","https://openalex.org/W1587991081"],"abstract_inverted_index":{"Detecting":[0],"small":[1,43,168],"scene":[2,88,169,189],"text":[3,44,128,154,190],"instances":[4],"in":[5,98,164],"the":[6,12,33,51,63,71,75,82,87,102,150],"wild":[7],"is":[8],"particularly":[9],"challenging,":[10],"where":[11],"influence":[13],"of":[14,35,40,50,86,104,153],"irregular":[15],"positions":[16],"and":[17,37,54,73,116,134,156],"nonideal":[18],"lighting":[19,55],"often":[20],"leads":[21],"to":[22,68,80,100,113,142,149],"detection":[23,129,191],"errors.":[24],"We":[25,90],"present":[26],"MixNet,":[27,178],"a":[28,93,139],"hybrid":[29],"architecture":[30],"that":[31,158,177],"combines":[32],"strengths":[34],"CNNs":[36],"Transformers,":[38],"capable":[39],"accurately":[41],"detecting":[42],"from":[45],"challenging":[46,165],"natural":[47],"scenes,":[48],"regardless":[49],"orientations,":[52],"styles,":[53],"conditions.":[56],"MixNet":[57],"incorporates":[58],"two":[59],"key":[60],"modules:":[61],"(1)":[62],"Feature":[64],"Shuffle":[65],"Network":[66],"(FSNet)":[67],"serve":[69],"as":[70],"backbone":[72,120],"(2)":[74],"Central":[76],"Transformer":[77],"Block":[78],"(CTBlock)":[79],"exploit":[81],"1D":[83],"manifold":[84],"constraint":[85],"text.":[89],"first":[91],"introduce":[92],"novel":[94],"feature":[95],"shuffling":[96],"strategy":[97],"FSNet":[99,119,181],"facilitate":[101],"exchange":[103],"features":[105,111,147],"across":[106],"multiple":[107,188],"scales,":[108],"generating":[109],"high-resolution":[110],"superior":[112],"popular":[114],"ResNet":[115],"HRNet.":[117],"The":[118],"has":[121],"achieved":[122],"significant":[123],"improvements":[124],"over":[125],"many":[126],"existing":[127],"methods,":[130],"including":[131],"PAN,":[132],"DB,":[133],"FAST.":[135],"Then":[136],"we":[137],"design":[138],"complementary":[140],"CTBlock":[141],"leverage":[143],"center":[144],"line":[145],"based":[146],"similar":[148],"medial":[151],"axis":[152],"regions":[155],"show":[157,176],"it":[159],"can":[160],"outperform":[161],"contour-based":[162],"approaches":[163],"cases":[166],"when":[167],"texts":[170],"appear":[171],"closely.":[172],"Extensive":[173],"experimental":[174],"results":[175,186],"which":[179],"mixes":[180],"with":[182],"CTBlock,":[183],"achieves":[184],"state-of-the-art":[185],"on":[187],"datasets.":[192]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4386185446","counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2025-04-24T04:48:14.938132","created_date":"2023-08-26"}