{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T07:18:25Z","timestamp":1725952705538},"reference-count":43,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2022,5,1]],"date-time":"2022-05-01T00:00:00Z","timestamp":1651363200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2022,5,4]],"date-time":"2022-05-04T00:00:00Z","timestamp":1651622400000},"content-version":"vor","delay-in-days":3,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Sciences"],"published-print":{"date-parts":[[2022,5]]},"DOI":"10.1016\/j.ins.2022.03.004","type":"journal-article","created":{"date-parts":[[2022,3,4]],"date-time":"2022-03-04T19:43:49Z","timestamp":1646423029000},"page":"364-377","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":16,"special_numbering":"C","title":["Solution of the linear quadratic regulator problem of black box linear systems using reinforcement learning"],"prefix":"10.1016","volume":"595","author":[{"given":"Adolfo","family":"Perrusqu\u00eda","sequence":"first","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.ins.2022.03.004_b0005","article-title":"Self-coupling black box model of a dynamic system based on ann and its application","author":"Chen","year":"2020","journal-title":"Math. Probl. Eng."},{"key":"10.1016\/j.ins.2022.03.004_b0010","doi-asserted-by":"crossref","unstructured":"L. Ljung, Black-box models from input-output measurements, in: Imtc 2001. proceedings of the 18th ieee instrumentation and measurement technology conference. rediscovering measurement in the age of informatics (cat. no. 01ch 37188), Vol. 1, IEEE, 2001, pp. 138\u2013146. doi:10.1109\/IMTC.2001.928802.","DOI":"10.1109\/IMTC.2001.928802"},{"key":"10.1016\/j.ins.2022.03.004_b0015","first-page":"304","article-title":"Fuzzy modeling from black-box data with deep learning techniques","author":"De la Rosa","year":"2017","journal-title":"International Symposium on Neural Networks, Springer"},{"issue":"2","key":"10.1016\/j.ins.2022.03.004_b0020","doi-asserted-by":"crossref","first-page":"301","DOI":"10.1109\/70.760351","article-title":"Neural network output feedback control of robot manipulators","volume":"15","author":"Kim","year":"1999","journal-title":"IEEE Trans. Robot. Autom."},{"issue":"5","key":"10.1016\/j.ins.2022.03.004_b0025","doi-asserted-by":"crossref","first-page":"1061","DOI":"10.1007\/s12369-019-00579-y","article-title":"Simplified stable admittance control using end-effector orientations","volume":"12","author":"Yu","year":"2020","journal-title":"Int. J. Soc. Robot."},{"issue":"4","key":"10.1016\/j.ins.2022.03.004_b0030","doi-asserted-by":"crossref","first-page":"933","DOI":"10.1177\/0954407020978319","article-title":"Path planning and robust fuzzy output-feedback control for unmanned ground vehicles with obstacle avoidance","volume":"235","author":"Chen","year":"2021","journal-title":"Proc. Inst. Mech. Eng., Part D: J. Autom. Eng."},{"issue":"1","key":"10.1016\/j.ins.2022.03.004_b0035","doi-asserted-by":"crossref","first-page":"27","DOI":"10.1007\/s11104-011-0963-z","article-title":"The curse of the black box","volume":"350","author":"Cortois","year":"2012","journal-title":"Plant Soil"},{"issue":"2","key":"10.1016\/j.ins.2022.03.004_b0040","doi-asserted-by":"crossref","first-page":"271","DOI":"10.1007\/s10846-019-01058-2","article-title":"Human-in-the-loop control using euler angles","volume":"97","author":"Perrusqu\u00eda","year":"2020","journal-title":"J. Intell. Robot. Syst."},{"key":"10.1016\/j.ins.2022.03.004_b0045","doi-asserted-by":"crossref","first-page":"65676","DOI":"10.1109\/ACCESS.2021.3073364","article-title":"Constant speed control of slider-crank mechanisms: A joint-task space hybrid control approach","volume":"9","author":"Flores-Campos","year":"2021","journal-title":"IEEE Access"},{"issue":"1","key":"10.1016\/j.ins.2022.03.004_b0050","doi-asserted-by":"crossref","first-page":"491","DOI":"10.3233\/JIFS-161822","article-title":"Continuous-time path planning for multi-agents with fuzzy reinforcement learning","volume":"33","author":"Luviano","year":"2017","journal-title":"J. Intell. Fuzzy Syst."},{"key":"10.1016\/j.ins.2022.03.004_b0055","doi-asserted-by":"crossref","unstructured":"A. Perrusqu\u00eda, J.A. Flores-Campos, W. Yu, Optimal sliding mode control for cutting tasks of quick-return mechanisms, ISA transactions doi:10.1016\/j.isatra.2021.04.033.","DOI":"10.1016\/j.isatra.2021.04.033"},{"key":"10.1016\/j.ins.2022.03.004_b0060","volume":"vol. 3","author":"Chen","year":"1999"},{"issue":"1","key":"10.1016\/j.ins.2022.03.004_b0065","doi-asserted-by":"crossref","first-page":"1705","DOI":"10.1016\/j.ifacol.2017.08.496","article-title":"Levant\u2019s arbitrary order differentiator with varying gain","volume":"50","author":"Moreno","year":"2017","journal-title":"IFAC-PapersOnLine"},{"key":"10.1016\/j.ins.2022.03.004_b0070","doi-asserted-by":"crossref","unstructured":"C. Edwards, S.K. Spurgeon, C.P. Tan, N. Patel, Sliding-mode observers, in: Mathematical methods for robust and nonlinear control, Springer, 2007, pp. 221\u2013242.","DOI":"10.1007\/978-1-84800-025-4_8"},{"key":"10.1016\/j.ins.2022.03.004_b0075","series-title":"Reinforcement Learning: An Introduction, Cambridge","author":"Sutton","year":"1998"},{"key":"10.1016\/j.ins.2022.03.004_b0080","series-title":"Optimal control","author":"Lewis","year":"2012"},{"issue":"6","key":"10.1016\/j.ins.2022.03.004_b0085","doi-asserted-by":"crossref","first-page":"76","DOI":"10.1109\/MCS.2012.2214134","article-title":"Reinforcement learning and feedback control: Using natural decision methods to design optimal adaptive controllers","volume":"32","author":"Lewis","year":"2012","journal-title":"IEEE Control Syst. Mag."},{"issue":"12","key":"10.1016\/j.ins.2022.03.004_b0090","doi-asserted-by":"crossref","first-page":"4441","DOI":"10.1109\/TCYB.2018.2868715","article-title":"Data-based optimal control of multiagent systems: A reinforcement learning design approach","volume":"49","author":"Zhang","year":"2018","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.ins.2022.03.004_b0095","doi-asserted-by":"crossref","first-page":"150","DOI":"10.1016\/j.neunet.2015.08.007","article-title":"Reinforcement learning solution for HJB equation arising in constrained optimal control problem","volume":"71","author":"Luo","year":"2015","journal-title":"Neural Networks"},{"issue":"6","key":"10.1016\/j.ins.2022.03.004_b0100","doi-asserted-by":"crossref","first-page":"2042","DOI":"10.1109\/TNNLS.2017.2773458","article-title":"Optimal and autonomous control using reinforcement learning: A survey","volume":"29","author":"Kiumarsi","year":"2018","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"10.1016\/j.ins.2022.03.004_b0105","doi-asserted-by":"crossref","first-page":"193","DOI":"10.1016\/j.automatica.2013.09.043","article-title":"Integral reinforcement learning and experience replay for adaptive optimal control of partially-unknown constrained-input continuous-time systems","volume":"50","author":"Modares","year":"2014","journal-title":"Automatica"},{"key":"10.1016\/j.ins.2022.03.004_b0110","doi-asserted-by":"crossref","first-page":"145","DOI":"10.1016\/j.neucom.2021.01.096","article-title":"Identification and optimal control of nonlinear systems using recurrent neural networks and reinforcement learning: An overview","volume":"438","author":"Perrusqu\u00eda","year":"2021","journal-title":"Neurocomputing"},{"issue":"10","key":"10.1016\/j.ins.2022.03.004_b0115","doi-asserted-by":"crossref","first-page":"2699","DOI":"10.1016\/j.automatica.2012.06.096","article-title":"Computational adaptive optimal control for continuous-time linear systems with completely unknown dynamics","volume":"48","author":"Jiang","year":"2012","journal-title":"Automatica"},{"key":"10.1016\/j.ins.2022.03.004_b0120","doi-asserted-by":"crossref","first-page":"1320","DOI":"10.1016\/j.automatica.2010.05.002","article-title":"Model-free H\u221e control design for unknown linear discrete-time systems via Q-learning with LMI","volume":"46","author":"Kim","year":"2010","journal-title":"Automatica"},{"issue":"5","key":"10.1016\/j.ins.2022.03.004_b0125","doi-asserted-by":"crossref","first-page":"878","DOI":"10.1016\/j.automatica.2010.02.018","article-title":"Online actor\u2013critic algorithm to solve the continuous-time infinite horizon optimal control problem","volume":"46","author":"Vamvoudakis","year":"2010","journal-title":"Automatica"},{"issue":"7","key":"10.1016\/j.ins.2022.03.004_b0130","doi-asserted-by":"crossref","first-page":"2920","DOI":"10.1002\/rnc.4911","article-title":"Robust control under worst-case uncertainty for unknown nonlinear systems using modified reinforcement learning","volume":"30","author":"Perrusqu\u00eda","year":"2020","journal-title":"Int. J. Robust Nonlinear Control"},{"issue":"4","key":"10.1016\/j.ins.2022.03.004_b0135","doi-asserted-by":"crossref","first-page":"1167","DOI":"10.1016\/j.automatica.2014.02.015","article-title":"Reinforcement Q-learning for optimal tracking control of linear discrete-time systems with unknown dynamics","volume":"50","author":"Kiumarsi","year":"2014","journal-title":"Automatica"},{"key":"10.1016\/j.ins.2022.03.004_b0140","series-title":"2019 6th International Conference on Control, Decision and Information Technologies (CoDIT)","first-page":"91","article-title":"Large space dimension reinforcement learning for robot position\/force discrete control","author":"Perrusqu\u00eda","year":"2019"},{"issue":"11","key":"10.1016\/j.ins.2022.03.004_b0145","doi-asserted-by":"crossref","first-page":"3051","DOI":"10.1109\/TAC.2014.2317301","article-title":"Linear quadratic tracking control of partially-unknown continuous-time systems using reinforcement learning","volume":"59","author":"Modares","year":"2014","journal-title":"IEEE Trans. Autom. Control"},{"key":"10.1016\/j.ins.2022.03.004_b0150","doi-asserted-by":"crossref","first-page":"358","DOI":"10.1016\/j.ins.2021.03.043","article-title":"Nonlinear control using human behavior learning","volume":"569","author":"Perrusqu\u00eda","year":"2021","journal-title":"Inf. Sci."},{"issue":"2","key":"10.1016\/j.ins.2022.03.004_b0155","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1108\/IR-10-2018-0209","article-title":"Position\/force control of robot manipulators using reinforcement learning","volume":"46","author":"Perrusqu\u00eda","year":"2019","journal-title":"Ind. Robot"},{"key":"10.1016\/j.ins.2022.03.004_b0160","series-title":"Neural Network control of robot manipulators and nonlinear systems","author":"Lewis","year":"1999"},{"key":"10.1016\/j.ins.2022.03.004_b0165","doi-asserted-by":"crossref","unstructured":"A. Perrusqu\u00eda, W. Yu, Discrete-time H2 neural control using reinforcement learning, IEEE Trans. Neural Networks Learn. Syst.https:\/\/doi.org\/10.1109\/TNNLS.2020.3026010.","DOI":"10.1109\/TNNLS.2020.3026010"},{"issue":"2","key":"10.1016\/j.ins.2022.03.004_b0170","doi-asserted-by":"crossref","first-page":"477","DOI":"10.1016\/j.automatica.2008.08.017","article-title":"Adaptive optimal control for continuous-time linear systems based on policy iteration","volume":"45","author":"Vrabie","year":"2009","journal-title":"Automatica"},{"issue":"1","key":"10.1016\/j.ins.2022.03.004_b0175","doi-asserted-by":"crossref","first-page":"231","DOI":"10.1007\/s13042-020-01167-7","article-title":"Multi-agent reinforcement learning for redundant robot control in task-space","volume":"12","author":"Perrusquia","year":"2021","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.ins.2022.03.004_b0180","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.sysconle.2016.12.003","article-title":"Q-learning for continuous-time linear systems: A model-free infinite horizon optimal control approach","volume":"100","author":"Vamvoudakis","year":"2017","journal-title":"Syst. Control Lett."},{"issue":"1","key":"10.1016\/j.ins.2022.03.004_b0185","doi-asserted-by":"crossref","first-page":"264","DOI":"10.1109\/TCST.2014.2322778","article-title":"Adaptive suboptimal output-feedback control for linear systems using integral reinforcement learning","volume":"23","author":"Zhu","year":"2015","journal-title":"IEEE Trans. Control Syst. Technol."},{"issue":"5","key":"10.1016\/j.ins.2022.03.004_b0190","doi-asserted-by":"crossref","first-page":"1523","DOI":"10.1109\/TNNLS.2018.2870075","article-title":"Output feedback Q-learning control for the discrete-time linear quadratic regulator problem","volume":"30","author":"Rizvi","year":"2019","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"10.1016\/j.ins.2022.03.004_b0195","doi-asserted-by":"crossref","DOI":"10.1016\/j.automatica.2021.109576","article-title":"Q-learning algorithm in solving consensusability problem of discrete-time multi-agent systems","volume":"128","author":"Feng","year":"2021","journal-title":"Automatica"},{"key":"10.1016\/j.ins.2022.03.004_b0200","series-title":"Nonlinear systems","author":"Khalil","year":"2002"},{"issue":"4","key":"10.1016\/j.ins.2022.03.004_b0205","doi-asserted-by":"crossref","first-page":"770","DOI":"10.1080\/00207721.2020.1839142","article-title":"Continuous-time reinforcement learning for robust control under worst-case uncertainty","volume":"52","author":"Perrusqu\u00eda","year":"2021","journal-title":"Int. J. Syst. Sci."},{"issue":"2","key":"10.1016\/j.ins.2022.03.004_b0210","doi-asserted-by":"crossref","first-page":"165","DOI":"10.1109\/TCYB.2014.2322116","article-title":"Continuous-time Q-learning for infinite-horizon discounted cost linear quadratic regulator problems","volume":"45","author":"Palanisamy","year":"2015","journal-title":"IEEE Trans. Cybern."},{"key":"10.1016\/j.ins.2022.03.004_b0215","doi-asserted-by":"crossref","unstructured":"A. Perrusqu\u00eda, W. Yu, Neural H2 control using continuous-time reinforcement learning, IEEE Trans. Cybern.https:\/\/doi.org\/10.1109\/TCYB.2020.3028988.","DOI":"10.1109\/TCYB.2020.3028988"}],"container-title":["Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025522002031?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0020025522002031?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2023,1,28]],"date-time":"2023-01-28T07:32:09Z","timestamp":1674891129000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0020025522002031"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,5]]},"references-count":43,"alternative-id":["S0020025522002031"],"URL":"https:\/\/doi.org\/10.1016\/j.ins.2022.03.004","relation":{},"ISSN":["0020-0255"],"issn-type":[{"value":"0020-0255","type":"print"}],"subject":[],"published":{"date-parts":[[2022,5]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Solution of the linear quadratic regulator problem of black box linear systems using reinforcement learning","name":"articletitle","label":"Article Title"},{"value":"Information Sciences","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.ins.2022.03.004","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2022 The Author. Published by Elsevier Inc.","name":"copyright","label":"Copyright"}]}}