{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,15]],"date-time":"2024-09-15T16:37:42Z","timestamp":1726418262931},"reference-count":37,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,3,1]],"date-time":"2023-03-01T00:00:00Z","timestamp":1677628800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Applied Soft Computing"],"published-print":{"date-parts":[[2023,3]]},"DOI":"10.1016\/j.asoc.2023.110030","type":"journal-article","created":{"date-parts":[[2023,1,14]],"date-time":"2023-01-14T02:11:18Z","timestamp":1673662278000},"page":"110030","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["An efficient regular expression inference approach for relevant image extraction"],"prefix":"10.1016","volume":"135","author":[{"ORCID":"http:\/\/orcid.org\/0000-0002-4253-8920","authenticated-orcid":false,"given":"Hayri Volkan","family":"Agun","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0003-4351-2244","authenticated-orcid":false,"given":"Erdin\u00e7","family":"Uzun","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"issue":"1","key":"10.1016\/j.asoc.2023.110030_b1","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1109\/MMUL.2004.1261103","article-title":"Categorizing Images in Web Documents","volume":"11","author":"Hu","year":"2004","journal-title":"IEEE Multimedia"},{"issue":"2","key":"10.1016\/j.asoc.2023.110030_b2","doi-asserted-by":"crossref","first-page":"1638","DOI":"10.1016\/j.asoc.2010.05.003","article-title":"Intelligent classification of web pages using contextual and visual features","volume":"11","author":"Ahmadi","year":"2011","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.asoc.2023.110030_b3","doi-asserted-by":"crossref","first-page":"1234","DOI":"10.1016\/j.ins.2019.10.045","article-title":"Determining the most representative image on a Web page","volume":"512","author":"Vyas","year":"2020","journal-title":"Inform. Sci."},{"issue":"6","key":"10.1016\/j.asoc.2023.110030_b4","first-page":"3389","article-title":"A regular expression generator based on CSS selectors for efficient extraction from HTML pages","volume":"28","author":"Uzun","year":"2020","journal-title":"Turk. J. Electr. Eng. Comput. Sci."},{"issue":"5","key":"10.1016\/j.asoc.2023.110030_b5","doi-asserted-by":"crossref","first-page":"850","DOI":"10.1136\/amiajnl-2013-002411","article-title":"Learning regular expressions for clinical text classification","volume":"21","author":"Bui","year":"2014","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"10.1016\/j.asoc.2023.110030_b6","first-page":"16","article-title":"Learning text patterns using separate-and-conquer genetic programming","author":"Bartoli","year":"2015"},{"key":"10.1016\/j.asoc.2023.110030_b7","series-title":"Proceedings of the ACM Symposium on Applied Computing, vol. 04-08-Apri","first-page":"97","article-title":"Active learning approaches for learning regular expressions with genetic programming","author":"Bartoli","year":"2016"},{"key":"10.1016\/j.asoc.2023.110030_b8","first-page":"455","article-title":"Improving recall of regular expressions for information extraction","volume":"vol. 7651","author":"Murthy","year":"2012"},{"issue":"6","key":"10.1016\/j.asoc.2023.110030_b9","first-page":"27","article-title":"Boilerplate removal and content extraction from dynamic web pages","volume":"4","author":"Ei San","year":"2014","journal-title":"Int. J. Comput. Sci. Eng. Appl."},{"key":"10.1016\/j.asoc.2023.110030_b10","first-page":"167","article-title":"Web2Text: Deep structured boilerplate removal","volume":"vol. 10772","author":"Vogels","year":"2018"},{"key":"10.1016\/j.asoc.2023.110030_b11","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1016\/j.ins.2015.12.025","article-title":"Language independent web news extraction system based on text detection framework","volume":"342","author":"Wu","year":"2016","journal-title":"Inform. Sci."},{"key":"10.1016\/j.asoc.2023.110030_b12","first-page":"3","article-title":"Web page structured content detection using supervised machine learning","author":"Velloso","year":"2019"},{"key":"10.1016\/j.asoc.2023.110030_b13","doi-asserted-by":"crossref","first-page":"296","DOI":"10.1016\/j.eswa.2017.02.045","article-title":"Using linguistic features to automatically extract web page title","volume":"79","author":"Gali","year":"2017","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.asoc.2023.110030_b14","first-page":"406","article-title":"Extracting content structure for web pages based on visual representation","author":"Cai","year":"2003"},{"issue":"2","key":"10.1016\/j.asoc.2023.110030_b15","doi-asserted-by":"crossref","first-page":"179","DOI":"10.1007\/s11280-004-4873-3","article-title":"Automating content extraction of HTML documents","volume":"8","author":"Gupta","year":"2005","journal-title":"World Wide Web"},{"issue":"10","key":"10.1016\/j.asoc.2023.110030_b16","doi-asserted-by":"crossref","first-page":"1181","DOI":"10.1002\/spe.2195","article-title":"An effective and efficient web content extractor for optimizing the crawling process","volume":"44","author":"Uzun","year":"2014","journal-title":"Softw. - Pract. Exp."},{"key":"10.1016\/j.asoc.2023.110030_b17","doi-asserted-by":"crossref","first-page":"68","DOI":"10.1016\/j.future.2018.06.028","article-title":"Machine learning based heterogeneous web advertisements detection using a diverse feature set","volume":"89","author":"Shaqoor Nengroo","year":"2018","journal-title":"Future Gener. Comput. Syst."},{"key":"10.1016\/j.asoc.2023.110030_b18","series-title":"IEEE International Symposium on Industrial Electronics, vol. 2016-Novem","first-page":"1099","article-title":"HTML web content extraction using paragraph tags","author":"Carey","year":"2016"},{"issue":"2","key":"10.1016\/j.asoc.2023.110030_b19","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1007\/s10844-014-0323-6","article-title":"Image understanding and the web: a state-of-the-art review","volume":"43","author":"Fauzi","year":"2014","journal-title":"J. Intell. Inf. Syst."},{"key":"10.1016\/j.asoc.2023.110030_b20","series-title":"Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","first-page":"1345","article-title":"Can we learn a template-independent wrapper for news article extraction from a single training site?","author":"Wang","year":"2009"},{"issue":"2","key":"10.1016\/j.asoc.2023.110030_b21","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1145\/276305.276330","article-title":"NoDoSE - A tool for semi-automatically extracting structured and semistructured data from text documents","volume":"27","author":"Adelberg","year":"1998","journal-title":"SIGMOD Rec."},{"key":"10.1016\/j.asoc.2023.110030_b22","series-title":"Proceedings - International Conference on Data Engineering","first-page":"611","article-title":"XWRAP: An XML-enabled wrapper construction system for Web information sources","author":"Liu","year":"2000"},{"key":"10.1016\/j.asoc.2023.110030_b23","series-title":"Thirteenth International World Wide Web Conference Proceedings, WWW2004","first-page":"502","article-title":"Automatic web news extraction using tree edit distance","author":"De Reis","year":"2004"},{"key":"10.1016\/j.asoc.2023.110030_b24","series-title":"2013 IEEE\/ACIS 12th International Conference on Computer and Information Science, ICIS 2013 - Proceedings","first-page":"65","article-title":"Layout-tree-based approach for identifying visually similar blocks in a web page","author":"Zeng","year":"2013"},{"issue":"4","key":"10.1016\/j.asoc.2023.110030_b25","doi-asserted-by":"crossref","first-page":"521","DOI":"10.1016\/j.ic.2008.12.008","article-title":"Algorithms for learning regular expressions from positive data","volume":"207","author":"Fernau","year":"2009","journal-title":"Inform. and Comput."},{"issue":"12","key":"10.1016\/j.asoc.2023.110030_b26","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1109\/MC.2014.344","article-title":"Automatic synthesis of regular expressions from examples","volume":"47","author":"Bartoli","year":"2014","journal-title":"Computer"},{"key":"10.1016\/j.asoc.2023.110030_b27","first-page":"3687","article-title":"Learning to identify concise regular expressions that describe email campaigns","volume":"16","author":"Prasse","year":"2015","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.asoc.2023.110030_b28","doi-asserted-by":"crossref","first-page":"29270","DOI":"10.1109\/ACCESS.2020.2972205","article-title":"CREGEX: A biomedical text classifier based on automatically generated regular expressions","volume":"8","author":"Flores","year":"2020","journal-title":"IEEE Access"},{"year":"2019","series-title":"Sketch-driven regular expression generation from natural language and examples","author":"Ye","key":"10.1016\/j.asoc.2023.110030_b29"},{"key":"10.1016\/j.asoc.2023.110030_b30","series-title":"International Conference on Tools and Algorithms for the Construction and Analysis of Systems","first-page":"152","article-title":"FOREST: An interactive multi-tree synthesizer for regular expressions","author":"Ferreira","year":"2021"},{"key":"10.1016\/j.asoc.2023.110030_b31","series-title":"EMNLP 2008 - 2008 Conference on Empirical Methods in Natural Language Processing, Proceedings of the Conference: A Meeting of SIGDAT, a Special Interest Group of the ACL","first-page":"21","article-title":"Regular expression learning for information extraction","author":"Li","year":"2008"},{"key":"10.1016\/j.asoc.2023.110030_b32","series-title":"Workshops At the Thirty-Second AAAI Conference on Artificial Intelligence\/Statistical Modeling of Natural Software Corpora, vol. 13","first-page":"791","article-title":"Generating regular expressions from natural language specifications: Are we there yet?","author":"Zhong","year":"2018"},{"issue":"7","key":"10.1016\/j.asoc.2023.110030_b33","doi-asserted-by":"crossref","first-page":"1063","DOI":"10.1109\/TPAMI.2005.143","article-title":"Learning deterministic finite automata with a smart state labeling evolutionary algorithm","volume":"27","author":"Lucas","year":"2005","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.asoc.2023.110030_b34","series-title":"2020 IEEE Congress on Evolutionary Computation, CEC 2020 - Conference Proceedings","article-title":"Data-driven regular expressions evolution for medical text classification using genetic programming","author":"Liu","year":"2020"},{"issue":"1\u20133","key":"10.1016\/j.asoc.2023.110030_b35","doi-asserted-by":"crossref","first-page":"100","DOI":"10.1016\/S0019-9958(85)80046-2","article-title":"Algorithms for approximate string matching","volume":"64","author":"Ukkonen","year":"1985","journal-title":"Inf. Control"},{"issue":"1","key":"10.1016\/j.asoc.2023.110030_b36","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1007\/s10032-002-0082-8","article-title":"Fast string correction with levenshtein automata","volume":"5","author":"Schulz","year":"2002","journal-title":"Int. J. Document Anal. Recognit."},{"issue":"1","key":"10.1016\/j.asoc.2023.110030_b37","doi-asserted-by":"crossref","first-page":"10","DOI":"10.1145\/1656274.1656278","article-title":"The WEKA data mining software: an update","volume":"11","author":"Hall","year":"2009","journal-title":"ACM SIGKDD Explor. Newsl."}],"container-title":["Applied Soft Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494623000480?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1568494623000480?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,2,16]],"date-time":"2023-02-16T14:11:34Z","timestamp":1676556694000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1568494623000480"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3]]},"references-count":37,"alternative-id":["S1568494623000480"],"URL":"https:\/\/doi.org\/10.1016\/j.asoc.2023.110030","relation":{},"ISSN":["1568-4946"],"issn-type":[{"type":"print","value":"1568-4946"}],"subject":[],"published":{"date-parts":[[2023,3]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"An efficient regular expression inference approach for relevant image extraction","name":"articletitle","label":"Article Title"},{"value":"Applied Soft Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.asoc.2023.110030","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2023 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"110030"}}