{"id":"https://openalex.org/W4297644039","doi":"https://doi.org/10.48550/arxiv.2209.02397","title":"A Scene-Text Synthesis Engine Achieved Through Learning from Decomposed Real-World Data","display_name":"A Scene-Text Synthesis Engine Achieved Through Learning from Decomposed Real-World Data","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4297644039","doi":"https://doi.org/10.48550/arxiv.2209.02397"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.02397","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"type":"preprint","type_crossref":"posted-content","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/abs/2209.02397","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017897073","display_name":"Zhengmi Tang","orcid":"https://orcid.org/0000-0003-2011-8105"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Zhengmi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009192524","display_name":"Tomo Miyazaki","orcid":"https://orcid.org/0000-0001-5205-0542"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Miyazaki, Tomo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020830042","display_name":"Shinichiro Omachi","orcid":"https://orcid.org/0000-0001-7706-9995"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Omachi, Shinichiro","raw_affiliation_strings":[],"affiliations":[]}],"institution_assertions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":0,"max":60},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9959,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9959,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/text-detection","display_name":"Text Detection","score":0.526503},{"id":"https://openalex.org/keywords/bittorrent-tracker","display_name":"BitTorrent tracker","score":0.4118857}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7838203},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5946901},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5267317},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.526503},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4873622},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.46529543},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4354886},{"id":"https://openalex.org/C57501372","wikidata":"https://www.wikidata.org/wiki/Q2021268","display_name":"BitTorrent tracker","level":3,"score":0.4118857},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.40771425},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36399058},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34831256},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32818368},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.16332501},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.02397","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.02397","pdf_url":"http://arxiv.org/pdf/2209.02397","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false},{"is_oa":false,"landing_page_url":"https://api.datacite.org/dois/10.48550/arxiv.2209.02397","pdf_url":null,"source":{"id":"https://openalex.org/S4393179698","display_name":"DataCite API","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I4210145204","host_organization_name":"DataCite","host_organization_lineage":["https://openalex.org/I4210145204"],"host_organization_lineage_names":["DataCite"],"type":"metadata"},"license":null,"license_id":null,"version":null}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://arxiv.org/abs/2209.02397","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":["Cornell University"],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false},"sustainable_development_goals":[],"grants":[],"datasets":[],"versions":[],"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4380994516","https://openalex.org/W2625833328","https://openalex.org/W2392921965","https://openalex.org/W2377979023","https://openalex.org/W2361861616","https://openalex.org/W2358755282","https://openalex.org/W2263699433","https://openalex.org/W2251519152","https://openalex.org/W2218034408","https://openalex.org/W1533177136"],"abstract_inverted_index":{"Scene-text":[0],"image":[1],"synthesis":[2],"techniques":[3],"that":[4,139],"aim":[5],"to":[6,24,27,66,79,176,190,204],"naturally":[7],"compose":[8],"text":[9,40,101,122,142,149,161,174,197,231],"instances":[10],"on":[11,42,98],"background":[12,179],"scene":[13,100,196,230],"images":[14,41,61],"are":[15],"very":[16],"appealing":[17],"for":[18,160,195,229],"training":[19,72],"deep":[20],"neural":[21],"networks":[22,184],"due":[23],"their":[25],"ability":[26],"provide":[28],"accurate":[29],"and":[30,44,95,124,147,170,188,216],"comprehensive":[31],"annotation":[32],"information.":[33],"Prior":[34],"studies":[35,56],"have":[36,57,76],"explored":[37,78],"generating":[38,59],"synthetic":[39,193],"two-dimensional":[43],"three-dimensional":[45],"surfaces":[46],"using":[47],"rules":[48],"derived":[49],"from":[50,81,110],"real-world":[51,83,107],"observations.":[52],"Some":[53],"of":[54,69,117,172,208],"these":[55],"proposed":[58,210,222],"scene-text":[60],"through":[62],"learning;":[63],"however,":[64],"owing":[65],"the":[67,128,157,168,173,178,192,206,209,217,221],"absence":[68],"a":[70,106,133,141,148],"suitable":[71,158],"dataset,":[73,130],"unsupervised":[74],"frameworks":[75],"been":[77],"learn":[80],"existing":[82,214],"data,":[84],"which":[85,164],"might":[86],"not":[87],"yield":[88],"reliable":[89],"performance.":[90],"To":[91],"ease":[92],"this":[93],"dilemma":[94],"facilitate":[96],"research":[97],"learning-based":[99],"synthesis,":[102],"we":[103,131],"introduce":[104],"DecompST,":[105],"dataset":[108,194],"prepared":[109],"some":[111],"public":[112],"benchmarks,":[113],"containing":[114],"three":[115],"types":[116],"annotations:":[118],"quadrilateral-level":[119],"BBoxes,":[120],"stroke-level":[121],"masks,":[123],"text-erased":[125],"images.":[126],"Leveraging":[127],"DecompST":[129],"propose":[132],"Learning-Based":[134],"Text":[135],"Synthesis":[136],"engine":[137],"(LBTS)":[138],"includes":[140],"location":[143],"proposal":[144],"network":[145,152],"(TLPNet)":[146],"appearance":[150],"adaptation":[151],"(TAANet).":[153],"TLPNet":[154],"first":[155],"predicts":[156],"regions":[159],"embedding,":[162],"after":[163],"TAANet":[165],"adaptively":[166],"adjusts":[167],"geometry":[169],"color":[171],"instance":[175],"match":[177],"context.":[180],"After":[181],"training,":[182],"those":[183],"can":[185,224],"be":[186],"integrated":[187],"utilized":[189],"generate":[191,225],"analysis":[198],"tasks.":[199],"Comprehensive":[200],"experiments":[201],"were":[202],"conducted":[203],"validate":[205],"effectiveness":[207],"LBTS":[211,223],"along":[212],"with":[213],"methods,":[215],"experimental":[218],"results":[219],"indicate":[220],"better":[226],"pretraining":[227],"data":[228],"detectors.":[232]},"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W4297644039","counts_by_year":[],"updated_date":"2025-01-18T20:02:58.519163","created_date":"2022-09-30"}