{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T01:57:52Z","timestamp":1740103072674,"version":"3.37.3"},"publisher-location":"New York, NY, USA","reference-count":116,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,7,10]]},"DOI":"10.1145\/3663529.3663849","type":"proceedings-article","created":{"date-parts":[[2024,7,10]],"date-time":"2024-07-10T19:43:13Z","timestamp":1720640593000},"page":"294-305","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Rethinking Software Engineering in the Era of Foundation Models: A Curated Catalogue of Challenges in the Development of Trustworthy FMware"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7749-5513","authenticated-orcid":false,"given":"Ahmed E.","family":"Hassan","sequence":"first","affiliation":[{"name":"Queen's University, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4034-6650","authenticated-orcid":false,"given":"Dayi","family":"Lin","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1812-5365","authenticated-orcid":false,"given":"Gopi Krishnan","family":"Rajbahadur","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5880-5114","authenticated-orcid":false,"given":"Keheliya","family":"Gallaba","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5494-685X","authenticated-orcid":false,"given":"Filipe Roseiro","family":"Cogo","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9103-5820","authenticated-orcid":false,"given":"Boyuan","family":"Chen","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3921-1724","authenticated-orcid":false,"given":"Haoxiang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8649-6163","authenticated-orcid":false,"given":"Kishanthan","family":"Thangarajah","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5419-9284","authenticated-orcid":false,"given":"Gustavo","family":"Oliva","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7133-2219","authenticated-orcid":false,"given":"Jiahuei (Justina)","family":"Lin","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1523-7628","authenticated-orcid":false,"given":"Wali Mohammad","family":"Abdullah","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence, Huawei Canada, Kingston, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3063-3197","authenticated-orcid":false,"given":"Zhen Ming (Jack)","family":"Jiang","sequence":"additional","affiliation":[{"name":"York University, Toronto, Canada"}]}],"member":"320","published-online":{"date-parts":[[2024,7,10]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. AutoGPT Documentation. https:\/\/docs.agpt.co\/ Accessed 01-17-2024"},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. BabyAGI. https:\/\/github.com\/yoheinakajima\/babyagi Accessed 17-01-2024"},{"key":"e_1_3_2_1_3_1","unstructured":"[n. d.]. CrewAI: Framework for orchestrating role-playing autonomous AI agents. https:\/\/github.com\/joaomdmoura\/crewai Accessed 17-01-2024"},{"key":"e_1_3_2_1_4_1","unstructured":"[n. d.]. MiniAGI - a minimal general-purpose autonomous agent based on GPT-3.5 \/ GPT-4.. https:\/\/github.com\/muellerberndt\/mini-agi Accessed 17-01-2024"},{"key":"e_1_3_2_1_5_1","unstructured":"A. Alford. 2024. OpenAI Releases New Embedding Models and Improved GPT-4 Turbo. https:\/\/www.infoq.com\/news\/2024\/02\/openai-model-updates\/"},{"volume-title":"Proceedings of the 41st International Conference on Software Engineering (ICSE-SEIP \u201919)","author":"Amershi S.","key":"e_1_3_2_1_6_1","unstructured":"S. Amershi, A. Begel, C. Bird, R. DeLine, H. Gall, E. Kamar, N. Nagappan, B. Nushi, and T. Zimmermann. 2019. Software engineering for machine learning: a case study. In Proceedings of the 41st International Conference on Software Engineering (ICSE-SEIP \u201919). IEEE Press, 291\u2013300."},{"key":"e_1_3_2_1_7_1","unstructured":"D. Amodei C. Olah J. Steinhardt P. Christiano and J. Schulman. 2016. Concrete Problems in AI Safety. arxiv:1606.06565."},{"key":"e_1_3_2_1_8_1","unstructured":"Artificial Intelligence Standards Committee (C\/AISC). [n. d.]. P3394: Standard for Large Language Model Agent Interface. https:\/\/standards.ieee.org\/ieee\/3394\/11377 Accessed 02-10-2024"},{"volume-title":"Proc. of Int. Conf. on Management of Data (SIGMOD). 362\u2013375","author":"Bach S.H","key":"e_1_3_2_1_9_1","unstructured":"S.H Bach, D. Rodriguez, Y. Liu, C. Luo, and H. Shao. 2019. Snorkel drybell: A case study in deploying weak supervision at industrial scale. In Proc. of Int. Conf. on Management of Data (SIGMOD). 362\u2013375."},{"key":"e_1_3_2_1_10_1","volume-title":"Constitutional AI: Harmlessness from AI Feedback. arxiv:2212.08073.","author":"Bai Y.","year":"2022","unstructured":"Y. Bai, S. Kadavath, S. Kundu, A. Askell, and J. Kernion. 2022. Constitutional AI: Harmlessness from AI Feedback. arxiv:2212.08073."},{"key":"e_1_3_2_1_11_1","unstructured":"M. Benjamin P. Gagnon N. Rostamzadeh C. Pal Y. Bengio and A. Shee. 2019. Towards standardization of data licenses: The montreal data license. arXiv preprint arXiv:1903.12262."},{"key":"e_1_3_2_1_12_1","unstructured":"S. Biegel R. El-Khatib L. O. V. B. Oliveira M. Baak and N. Aben. 2021. Active weasul: improving weak supervision with active learning. arXiv preprint arXiv:2104.14847."},{"key":"e_1_3_2_1_13_1","unstructured":"R. Bommasani D.A Hudson E. Adeli and Altman. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110542"},{"key":"e_1_3_2_1_15_1","unstructured":"D. Brajovic N. Renner V.P. Goebels P. Wagner and B. Fresz. 2023. Model Reporting for Certifiable AI: A Proposal from Merging EU Regulation into AI Development. arXiv preprint arXiv:2307.11525."},{"key":"e_1_3_2_1_16_1","unstructured":"T. Brown B. Mann N. Ryder M. Subbiah and J.D. Kaplan. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_2_1_17_1","unstructured":"M. Casey. 2023. LLMS high priority for Enterprise Data Science but concerns remain. https:\/\/snorkel.ai\/poll-data-llm-high-priority-enterprise-data-science-concerns-remain Accessed 01-29-2024"},{"volume-title":"Proc. of ACM Joint Meeting on ESEC\/FSE. ACM, 429\u2013440","author":"Chakraborty J.","key":"e_1_3_2_1_18_1","unstructured":"J. Chakraborty, S. Majumder, and T. Menzies. 2021. Bias in machine learning software: why? how? what to do? In Proc. of ACM Joint Meeting on ESEC\/FSE. ACM, 429\u2013440."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"K.K Chang M. Cramer S. Soni and D. Bamman. 2023. Speak memory: An archaeology of books known to chatgpt\/gpt-4. arXiv preprint arXiv:2305.00118.","DOI":"10.18653\/v1\/2023.emnlp-main.453"},{"key":"e_1_3_2_1_20_1","unstructured":"J. Chen N. Yoshida and H. Takada. 2023. An investigation of licensing of datasets for machine learning based on the GQM model. arXiv preprint arXiv:2303.13735."},{"key":"e_1_3_2_1_21_1","unstructured":"L. Chen M. Zaharia and J. Zou. 2023. How is ChatGPT\u2019s behavior changing over time? arxiv:2307.09009."},{"key":"e_1_3_2_1_22_1","volume-title":"Prompt Sapper: A LLM-Empowered Production Tool for Building AI Chains. ACM Trans. on Softw. Eng. and Methodology, Dec., issn:1557-7392","author":"Cheng Y.","year":"2023","unstructured":"Y. Cheng, J. Chen, Q. Huang, Z. Xing, and X. Xu. 2023. Prompt Sapper: A LLM-Empowered Production Tool for Building AI Chains. ACM Trans. on Softw. Eng. and Methodology, Dec., issn:1557-7392"},{"key":"e_1_3_2_1_23_1","unstructured":"T. Claburn. 2023. GitHub Microsoft OpenAI fail to wriggle out of Copilot copyright lawsuit. https:\/\/www.theregister.com\/2023\/05\/12\/github_microsoft_openai_copilot\/ Accessed 01-17-2024"},{"volume-title":"Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency. 778\u2013788","author":"Contractor D.","key":"e_1_3_2_1_24_1","unstructured":"D. Contractor, D. McDuff, J.K. Haines, J. Lee, and C. Hines. 2022. Behavioral use licensing for responsible AI. In Proceedings of the 2022 ACM Conference on Fairness, Accountability, and Transparency. 778\u2013788."},{"key":"e_1_3_2_1_25_1","unstructured":"B. Deiseroth M. Deb S. Weinbach M. Brack and P. Schramowski. 2023. AtMan: Understanding Transformer Predictions Through Memory Efficient Attention Manipulation. arxiv:2301.08110."},{"volume-title":"Proc. of Conf. on EMNLP. ACL, 3369\u20133391","author":"Deng M.","key":"e_1_3_2_1_26_1","unstructured":"M. Deng, J. Wang, C.-P. Hsieh, Y. Wang, and H. Guo. 2022. RLPrompt: Optimizing Discrete Text Prompts with Reinforcement Learning. In Proc. of Conf. on EMNLP. ACL, 3369\u20133391."},{"key":"e_1_3_2_1_27_1","unstructured":"Dynatrace. 2023. What is Observability? Not just logs metrics and traces.. https:\/\/www.dynatrace.com\/news\/blog\/what-is-observability-2 Accessed: 01-31-2024"},{"key":"e_1_3_2_1_28_1","volume-title":"The EU AI Act: a summary of its significance and scope. AI (the EU AI Act), 1","author":"Edwards L.","year":"2021","unstructured":"L. Edwards. 2021. The EU AI Act: a summary of its significance and scope. AI (the EU AI Act), 1 (2021)."},{"key":"e_1_3_2_1_29_1","volume-title":"Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution. arxiv:2309.16797.","author":"Fernando C.","year":"2023","unstructured":"C. Fernando, D. Banarse, H. Michalewski, S. Osindero, and T. Rockt\u00e4schel. 2023. Promptbreeder: Self-Referential Self-Improvement Via Prompt Evolution. arxiv:2309.16797."},{"key":"e_1_3_2_1_30_1","unstructured":"AI Engineer Foundation. 2023. Agent Protocol. https:\/\/agentprotocol.ai Accessed: 01-31-2024"},{"key":"e_1_3_2_1_31_1","unstructured":"Linux Foundation. [n. d.]. SPDX Announces 3.0 Release Candidate with New Use Cases. https:\/\/www.linuxfoundation.org\/press\/spdx-sbom-3-release-candidate Accessed 02-07-2024"},{"volume-title":"Patterns of Enterprise Application Architecture","author":"Fowler Martin","key":"e_1_3_2_1_32_1","unstructured":"Martin Fowler. 2002. Patterns of Enterprise Application Architecture. Addison-Wesley Longman Publishing Co., Inc., USA. isbn:0321127420"},{"volume-title":"Proc. of Annual Meeting of the ACL-IJCNLP. (ACL), 3816\u20133830","author":"Gao T.","key":"e_1_3_2_1_33_1","unstructured":"T. Gao, A. Fisch, and D. Chen. 2021. Making Pre-trained Language Models Better Few-shot Learners. In Proc. of Annual Meeting of the ACL-IJCNLP. (ACL), 3816\u20133830."},{"key":"e_1_3_2_1_34_1","volume-title":"Prompt Cache: Modular Attention Reuse for Low-Latency Inference. arXiv preprint arXiv:2311.04934.","author":"Gim I.","year":"2023","unstructured":"I. Gim, G. Chen, S.S. Lee, N. Sarda, and A. Khandelwal. 2023. Prompt Cache: Modular Attention Reuse for Low-Latency Inference. arXiv preprint arXiv:2311.04934."},{"key":"e_1_3_2_1_35_1","unstructured":"Github. [n. d.]. GitHub Next | Copilot Workspace. https:\/\/githubnext.com\/projects\/copilot-workspace\/ Accessed 02-06-2024"},{"key":"e_1_3_2_1_36_1","unstructured":"Google. [n. d.]. Permissions on Android. https:\/\/developer.android.com\/guide\/topics\/permissions\/overview Accessed 02-06-2024"},{"key":"e_1_3_2_1_37_1","volume-title":"And Segment Forecasts, 2024","author":"Research Grand View","year":"2023","unstructured":"Grand View Research. 2023. Large Language Model Market Size, Share & Trends Analysis Report By Application (Customer Service, Content Generation), By Deployment, By Industry Vertical, By Region, And Segment Forecasts, 2024 - 2030. https:\/\/www.grandviewresearch.com\/industry-analysis\/large-language-model-llm-market-report"},{"key":"e_1_3_2_1_38_1","unstructured":"R. Grosse J. Bae C. Anil N. Elhage and A. Tamkin. 2023. Studying large language model generalization with influence functions. arXiv preprint arXiv:2308.03296."},{"key":"e_1_3_2_1_39_1","unstructured":"S. Gunasekar Y. Zhang J. Aneja C.C.T. Mendes and A. Del Giorno. 2023. Textbooks Are All You Need. arXiv preprint arXiv:2306.11644."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"\u0141. G\u00f3rski and S. Ramakrishna. 2023. Challenges in Adapting LLMs for Transparency: Complying with Art. 14 EU AI Act.","DOI":"10.3233\/FAIA230974"},{"key":"e_1_3_2_1_41_1","unstructured":"A.E. Hassan B. Adams F. Khomh N. Nagappan and T. Zimmermann. 2023. FM+SE Vision 2030. https:\/\/fmse.io\/ Accessed 01-17-2024"},{"key":"e_1_3_2_1_42_1","unstructured":"S. Hong M. Zhuge J. Chen X. Zheng and Y. Cheng. 2023. MetaGPT: Meta Programming for A Multi-Agent Collaborative Framework. arxiv:2308.00352."},{"key":"e_1_3_2_1_43_1","volume-title":"Instruction Induction: From Few Examples to Natural Language Task Descriptions. arxiv:2205.10782.","author":"Honovich O.","year":"2022","unstructured":"O. Honovich, U. Shaham, S.R. Bowman, and O. Levy. 2022. Instruction Induction: From Few Examples to Natural Language Task Descriptions. arxiv:2205.10782."},{"key":"e_1_3_2_1_44_1","unstructured":"C.-J. Hsieh S. Si F.X. Yu and I.S. Dhillon. 2023. Automatic Engineering of Long Prompts. arxiv:2311.10117."},{"key":"e_1_3_2_1_45_1","unstructured":"LangChain Inc. [n. d.]. LangSmith. https:\/\/www.langchain.com\/langsmith Accessed 01-18-2024"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Z. Ji N. Lee R. Frieske T. Yu and D. Su. [n. d.]. Survey of Hallucination in Natural Language Generation. ACM Comput. Surv. 55 12 ([n. d.]) Article 248 issn:0360-0300","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_47_1","unstructured":"A.Q Jiang A. Sablayrolles A. Mensch C. Bamford and D.S. Chaplot. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825."},{"key":"e_1_3_2_1_48_1","volume-title":"Llmlingua: Compressing prompts for accelerated inference of large language models. arXiv preprint arXiv:2310.05736.","author":"Jiang H.","year":"2023","unstructured":"H. Jiang, Q. Wu, C.Y. Lin, Y. Yang, and L. Qiu. 2023. Llmlingua: Compressing prompts for accelerated inference of large language models. arXiv preprint arXiv:2310.05736."},{"volume-title":"Proc. of Int. Conf. on Machine Learning. Article 642","author":"Kandpal N.","key":"e_1_3_2_1_49_1","unstructured":"N. Kandpal, B. Lester, M. Muqeeth, A. Mascarenhas, and M. Evans. 2023. Git-Theta: a git extension for collaborative development of machine learning models. In Proc. of Int. Conf. on Machine Learning. Article 642, 12 pages."},{"key":"e_1_3_2_1_50_1","unstructured":"O. Khattab A. Singhvi P. Maheshwari Z. Zhang and K. Santhanam. 2023. DSPy: Compiling Declarative Language Model Calls into Self-Improving Pipelines. arXiv preprint arXiv:2310.03714."},{"key":"e_1_3_2_1_51_1","unstructured":"F. Khomh H. Li M. Lamothe M.A. Hamdaqa and J. Cheng. 2023. Software Engineering for Machine Learning Applications (SEMLA) 2023. https:\/\/semla.polymtl.ca\/ Accessed 01-17-2024"},{"key":"e_1_3_2_1_52_1","unstructured":"S. Krishnan M.J. Franklin K. Goldberg and E. Wu. 2017. BoostClean: Automated Error Detection and Repair for Machine Learning. arxiv:1711.01299."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"S. Krishnan J. Wang E. Wu M.J. Franklin and K. Goldberg. 2016. ActiveClean: Interactive Data Cleaning While Learning Convex Loss Models. arxiv:1601.03797.","DOI":"10.1145\/2882903.2899409"},{"key":"e_1_3_2_1_54_1","unstructured":"LangChain Inc.. 2024. LangSmith Documentation. https:\/\/docs.smith.langchain.com\/ Accessed: 01-31-2024"},{"key":"e_1_3_2_1_55_1","unstructured":"LangChain Inc.. 2024. Off-the-shelf LangChain Evaluators. https:\/\/docs.smith.langchain.com\/evaluation\/evaluator-implementations Accessed: 01-31-2024"},{"key":"e_1_3_2_1_56_1","volume-title":"LANGCHAIN BLOG: OpenAI\u2019s Bet on a Cognitive Architecture. https:\/\/blog.langchain.dev\/openais-bet-on-a-cognitive-architecture\/ Accessed: 2024-02-08","author":"Team Langchain","year":"2023","unstructured":"Langchain Team. 2023. LANGCHAIN BLOG: OpenAI\u2019s Bet on a Cognitive Architecture. https:\/\/blog.langchain.dev\/openais-bet-on-a-cognitive-architecture\/ Accessed: 2024-02-08"},{"key":"e_1_3_2_1_57_1","unstructured":"Y. LeCun. 2022. What is the future of AI. https:\/\/www.facebook.com\/watch\/live\/?ref=watch_permalink&v=2219848494820560 Accessed: 2024-02-08"},{"key":"e_1_3_2_1_58_1","unstructured":"P. Lewis E. Perez A. Piktus F. Petroni and V. Karpukhin. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. In Advances in Neural Information Processing Systems H. Larochelle M. Ranzato R. Hadsell M.F. Balcan and H. Lin (Eds.). 33 Curran Associates Inc. 9459\u20139474."},{"volume-title":"Proc. of Int. Conf. on Softw. Eng. (ICSE). 2215\u20132227","author":"Li Y.","key":"e_1_3_2_1_59_1","unstructured":"Y. Li, L. Meng, L. Chen, L. Yu, and D. Wu. 2022. Training Data Debugging for the Fairness of Machine Learning Software. In Proc. of Int. Conf. on Softw. Eng. (ICSE). 2215\u20132227."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"crossref","unstructured":"Y. Liang C. Wu T. Song W. Wu and Y. Xia. 2023. TaskMatrix.AI: Completing Tasks by Connecting Foundation Models with Millions of APIs. arxiv:2303.16434.","DOI":"10.34133\/icomputing.0063"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"crossref","unstructured":"N.F. Liu K. Lin J. Hewitt A. Paranjape and M. Bevilacqua. 2023. Lost in the middle: How language models use long contexts. arXiv preprint arXiv:2307.03172.","DOI":"10.1162\/tacl_a_00638"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"P. Liu W. Yuan J. Fu Z. Jiang H. Hayashi and G. Neubig. 2023. Pre-train Prompt and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. ACM Comput. Surv. 55 9 (2023) Article 195.","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Y. Liu. 2023. The importance of human-labeled data in the era of LLMs. arXiv preprint arXiv:2306.14910.","DOI":"10.24963\/ijcai.2023\/802"},{"key":"e_1_3_2_1_64_1","volume-title":"Gpteval: Nlg evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634.","author":"Liu Y.","year":"2023","unstructured":"Y. Liu, D. Iter, Yi. Xu, S. Wang, R. Xu, and C. Zhu. 2023. Gpteval: Nlg evaluation using gpt-4 with better human alignment. arXiv preprint arXiv:2303.16634."},{"key":"e_1_3_2_1_65_1","unstructured":"Llamaindex. [n. d.]. LlamaIndex (formerly GPT Index) is a data framework for your LLM applications. https:\/\/github.com\/run-llama\/llama_index Accessed 02-07-2024"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"crossref","unstructured":"S. Longpre R. Mahari A. Chen N. Obeng-Marnu and D. Sileo. 2023. The Data Provenance Initiative: A Large Scale Audit of Dataset Licensing & Attribution in AI. arXiv preprint arXiv:2310.16787.","DOI":"10.1038\/s42256-024-00878-8"},{"key":"e_1_3_2_1_67_1","first-page":"3230","article-title":"Low-resource interactive active labeling for fine-tuning language models. In Findings of the Assoc","volume":"2022","author":"Maekawa S.","year":"2022","unstructured":"S. Maekawa, D. Zhang, H. Kim, S. Rahman, and E. Hruschka. 2022. Low-resource interactive active labeling for fine-tuning language models. In Findings of the Assoc. for Computational Linguistics: EMNLP 2022. 3230\u20133242.","journal-title":"for Computational Linguistics: EMNLP"},{"volume-title":"Proc. of Int. Conf. Softw. Eng: New Ideas and Emerging Results (ICSE-NIER).","author":"Martie L.","key":"e_1_3_2_1_68_1","unstructured":"L. Martie, J. Rosenberg, V. Demers, G. Zhang, and O. Bhardwaj. 2023. Rapid Development of Compositional AI. In Proc. of Int. Conf. Softw. Eng: New Ideas and Emerging Results (ICSE-NIER)."},{"key":"e_1_3_2_1_69_1","unstructured":"Microsoft. [n. d.]. microsoft\/semantic-kernel: Integrate cutting-edge LLM technology quickly and easily into your apps. https:\/\/github.com\/microsoft\/semantic-kernel Accessed 01-30-2024"},{"key":"e_1_3_2_1_70_1","unstructured":"Microsoft. 2023. Prompt flow documentation. https:\/\/microsoft.github.io\/promptflow Accessed 02-06-2024"},{"key":"e_1_3_2_1_71_1","unstructured":"M. Mozes X. He B. Kleinberg and L.D Griffin. 2023. Use of llms for illicit purposes: Threats prevention measures and vulnerabilities. arXiv preprint arXiv:2308.12833."},{"key":"e_1_3_2_1_72_1","unstructured":"V. Murali C. Maddila I. Ahmad M. Bolin and D. Cheng. 2023. CodeCompose: A Large-Scale Industrial Deployment of AI-assisted Code Authoring. arXiv preprint arXiv:2305.12050."},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"crossref","unstructured":"A. Nguyen-Duc B. Cabrero-Daniel A. Przybylek C. Arora and D. Khanna. 2023. Generative Artificial Intelligence for Software Engineering\u2013A Research Agenda. arXiv preprint arXiv:2310.18648.","DOI":"10.2139\/ssrn.4622517"},{"key":"e_1_3_2_1_74_1","volume-title":"Improving the Nation\u2019s Cybersecurity: NIST\u2019s Responsibilities Under the","author":"NIST.","year":"2021","unstructured":"NIST. 2021. Improving the Nation\u2019s Cybersecurity: NIST\u2019s Responsibilities Under the May 2021 Executive Order. https:\/\/www.nist.gov\/itl\/executive-order-improving-nations-cybersecurity\/software-supply-chain-security-guidance-0 Accessed 02-01-2024"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"crossref","unstructured":"C. Novelli F. Casolari A. Rotolo M. Taddeo and L. Floridi. 2023. Taking AI risks seriously: a new assessment model for the AI Act. AI & SOCIETY.","DOI":"10.1007\/s00146-023-01723-z"},{"key":"e_1_3_2_1_76_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang L.","year":"2022","unstructured":"L. Ouyang, J. Wu, X. Jiang, D. Almeida, and C. Wainwright. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, 35 (2022), 27730\u201327744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_77_1","doi-asserted-by":"crossref","unstructured":"S. Ouyang J.M. Zhang M. Harman and M. Wang. 2023. LLM is Like a Box of Chocolates: the Non-determinism of ChatGPT in Code Generation. arXiv preprint arXiv:2308.02828.","DOI":"10.1145\/3697010"},{"key":"e_1_3_2_1_78_1","unstructured":"V. Pamula. 2023. An Introduction to LLMOps: Operationalizing and Managing Large Language Models using Azure ML. https:\/\/techcommunity.microsoft.com\/t5\/ai-machine-learning-blog\/an-introduction-to-llmops-operationalizing-and-managing-large\/ba-p\/3910996 Accessed: 01-31-2024"},{"key":"e_1_3_2_1_79_1","unstructured":"C. Parnin G. Soares R. Pandita S. Gulwani and J. Rich. 2023. Building Your Own Product Copilot: Challenges Opportunities and Needs. arxiv:2312.14231."},{"volume-title":"Proc. of Int. Conf. on Softw. Eng.: Software Engineering in Practice. 253\u2013262","author":"Parry O.","key":"e_1_3_2_1_80_1","unstructured":"O. Parry, G.M. Kapfhammer, M. Hilton, and P. McMinn. 2022. Surveying the developer experience of flaky tests. In Proc. of Int. Conf. on Softw. Eng.: Software Engineering in Practice. 253\u2013262."},{"key":"e_1_3_2_1_81_1","volume-title":"Interaction Design","author":"Preece Jenny","unstructured":"Jenny Preece, Yvonne Rogers, and Helen Sharp. 2002. Interaction Design (1st ed.). John Wiley & Sons, Inc., USA. isbn:0471492787","edition":"1"},{"key":"e_1_3_2_1_82_1","unstructured":"Pallets Project. [n. d.]. Jinja. https:\/\/palletsprojects.com\/p\/jinja\/ Accessed 02-06-2024"},{"key":"e_1_3_2_1_83_1","unstructured":"Y. Qin S. Liang Y. Ye K. Zhu and L. Yan. 2023. ToolLLM: Facilitating Large Language Models to Master 16000+ Real-world APIs. arxiv:2307.16789."},{"key":"e_1_3_2_1_84_1","unstructured":"G.K. Rajbahadur E. Tuck L. Zi D. Lin and B. Chen. 2021. Can I use this publicly available dataset to build commercial AI software?\u2013A Case Study on Publicly Available Image Datasets. arXiv preprint arXiv:2111.02374."},{"volume-title":"Proc. of Int. Conf. on Very Large Data Bases (VLDB Endowment).","author":"Ratner A.","key":"e_1_3_2_1_85_1","unstructured":"A. Ratner, S.H Bach, H. Ehrenberg, J. Fries, and S. Wu. 2017. Snorkel: Rapid training data creation with weak supervision. In Proc. of Int. Conf. on Very Large Data Bases (VLDB Endowment)."},{"key":"e_1_3_2_1_86_1","volume-title":"Handbook of Automated Reasoning","author":"Robinson J.","year":"1822","unstructured":"J. Robinson and Andrei Voronkov. 2001. Handbook of Automated Reasoning: Volume 1. MIT Press, Cambridge, MA, USA. isbn:0262182211"},{"key":"e_1_3_2_1_87_1","volume-title":"Toolformer: Language Models Can Teach Themselves to Use Tools. arxiv:2302.04761.","author":"Schick T.","year":"2023","unstructured":"T. Schick, J. Dwivedi-Yu, R. Dess\u00ec, R. Raileanu, and M. Lomeli. 2023. Toolformer: Language Models Can Teach Themselves to Use Tools. arxiv:2302.04761."},{"key":"e_1_3_2_1_88_1","doi-asserted-by":"crossref","unstructured":"S. Schoch R. Mishra and Y. Ji. 2023. Data Selection for Fine-tuning Large Language Models Using Transferred Shapley Values. arXiv preprint arXiv:2306.10165.","DOI":"10.18653\/v1\/2023.acl-srw.37"},{"key":"e_1_3_2_1_89_1","unstructured":"W. Shi A. Ajith M. Xia Y. Huang and D. Liu. 2023. Detecting pretraining data from large language models. arXiv preprint arXiv:2310.16789."},{"key":"e_1_3_2_1_90_1","unstructured":"I. Shumailov Z. Shumaylov Y. Zhao Y. Gal and N. Papernot. 2023. The curse of recursion: training on generated data makes models forget. Arxiv. Preprint posted online 27 (2023)."},{"key":"e_1_3_2_1_91_1","first-page":"1146","article-title":"Interactive and Visual Prompt Engineering for Ad-hoc Task Adaptation with Large Language Models","volume":"29","author":"Strobelt H.","year":"2023","unstructured":"H. Strobelt, A. Webson, V. Sanh, B. Hoover, and J. Beyer. 2023. Interactive and Visual Prompt Engineering for Ad-hoc Task Adaptation with Large Language Models. IEEE Trans. on Visualization and Computer Graphics, 29, 1 (2023), 1146\u20131156.","journal-title":"IEEE Trans. on Visualization and Computer Graphics"},{"key":"e_1_3_2_1_92_1","unstructured":"S. Tilga. [n. d.]. LLMs & humans: The perfect duo for data labeling. https:\/\/toloka.ai\/blog\/llms-and-humans-for-data-labeling\/ Accessed 01-29-2024"},{"volume-title":"NeurIPS 2022 Foundation Models for Decision Making Workshop.","author":"Valmeekam K.","key":"e_1_3_2_1_93_1","unstructured":"K. Valmeekam, A. Olmo, S. Sreedharan, and S. Kambhampati. 2022. Large Language Models Still Can\u2019t Plan (A Benchmark for LLMs on Planning and Reasoning about Change). In NeurIPS 2022 Foundation Models for Decision Making Workshop."},{"key":"e_1_3_2_1_94_1","unstructured":"L. Wang X. Zhang H. Su and J. Zhu. 2023. A Comprehensive Survey of Continual Learning: Theory Method and Application. arxiv:2302.00487."},{"key":"e_1_3_2_1_95_1","doi-asserted-by":"crossref","unstructured":"Y. Wang Y. Kordi S. Mishra A. Liu and N.A. Smith. 2023. Self-Instruct: Aligning Language Models with Self-Generated Instructions. arxiv:2212.10560.","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"e_1_3_2_1_96_1","unstructured":"J. Wei X. Wang D. Schuurmans M. Bosma and E. Chi. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in Neural Info Processing Systems."},{"volume-title":"Int. Conf. on Secure Softw. Integration and Reliability Improvement. 14\u201322","author":"Wong W.E.","key":"e_1_3_2_1_97_1","unstructured":"W.E. Wong, V. Debroy, A. Surampudi, H. Kim, and M.F. Siok. 2010. Recent Catastrophic Accidents: Investigating How Software was Responsible. In Int. Conf. on Secure Softw. Integration and Reliability Improvement. 14\u201322."},{"key":"e_1_3_2_1_98_1","unstructured":"FOSSology Workgroup. 2023. FOSSology. https:\/\/www.fossology.org\/ Accessed 02-07-2024"},{"key":"e_1_3_2_1_99_1","unstructured":"OpenDataology workgroup. 2022. OpenDataology. https:\/\/github.com\/OpenDataology\/ Accessed 02-07-2024"},{"key":"e_1_3_2_1_100_1","unstructured":"Q. Wu G. Bansal J. Zhang Y. Wu B. Li and E. Zhu. 2023. AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation. arxiv:2308.08155."},{"volume-title":"Extended Abstracts of Conf. on Human Factors in Computing Systems. ACM.","author":"Wu T.","key":"e_1_3_2_1_101_1","unstructured":"T. Wu, E. Jiang, A. Donsbach, J. Gray, and A. Molina. 2022. PromptChainer: Chaining Large Language Model Prompts through Visual Programming. In Extended Abstracts of Conf. on Human Factors in Computing Systems. ACM."},{"volume-title":"Proc. of Conf. on Human Factors in Computing Systems (CHI). ACM.","author":"Wu T.","key":"e_1_3_2_1_102_1","unstructured":"T. Wu, M. Terry, and C.J. Cai. 2022. AI Chains: Transparent and Controllable Human-AI Interaction by Chaining Large Language Model Prompts. In Proc. of Conf. on Human Factors in Computing Systems (CHI). ACM."},{"key":"e_1_3_2_1_103_1","unstructured":"Z. Wu L. Qiu A. Ross E. Aky\u00fcrek and B. Chen. 2023. Reasoning or Reciting? Exploring the Capabilities and Limitations of Language Models Through Counterfactual Tasks. arxiv:2307.02477."},{"key":"e_1_3_2_1_104_1","unstructured":"xAI Team. 2023. PromptIDE. https:\/\/x.ai\/prompt-ide\/ Accessed 01-18-2024"},{"key":"e_1_3_2_1_105_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510457.3513050"},{"volume-title":"Proc. of Conf. on EMNLP. ACL, 8162\u20138171","author":"Xu H.","key":"e_1_3_2_1_106_1","unstructured":"H. Xu, Y. Chen, Y. Du, N. Shao, and W. Yanggang. 2022. GPS: Genetic Prompt Search for Efficient Few-Shot Learning. In Proc. of Conf. on EMNLP. ACL, 8162\u20138171."},{"key":"e_1_3_2_1_107_1","unstructured":"Q. Xu F. Hong B. Li C. Hu and Z. Chen. 2023. On the Tool Manipulation Capability of Open-source Large Language Models. arxiv:2305.16504."},{"key":"e_1_3_2_1_108_1","volume-title":"React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629.","author":"Yao S.","year":"2022","unstructured":"S. Yao, J. Zhao, D. Yu, N. Du, I. Shafran, K. Narasimhan, and Y. Cao. 2022. React: Synergizing reasoning and acting in language models. arXiv preprint arXiv:2210.03629."},{"key":"e_1_3_2_1_109_1","unstructured":"Q. Ye M. Axmed R. Pryzant and F. Khani. 2023. Prompt Engineering a Prompt Engineer. arxiv:2311.05661."},{"volume-title":"Proc. of Conf. of the North American Chapter of the Assoc. for Computational Linguistics: Human Language Technologies. 1422\u20131436","author":"Yu Y.","key":"e_1_3_2_1_110_1","unstructured":"Y. Yu, L. Kong, J. Zhang, R. Zhang, and C. Zhang. 2022. AcTune: Uncertainty-based active self-training for active fine-tuning of pretrained language models. In Proc. of Conf. of the North American Chapter of the Assoc. for Computational Linguistics: Human Language Technologies. 1422\u20131436."},{"key":"e_1_3_2_1_111_1","unstructured":"S. Zhang L. Dong X. Li S. Zhang and X. Sun. 2023. Instruction tuning for large language models: A survey. arXiv preprint arXiv:2308.10792."},{"key":"e_1_3_2_1_112_1","unstructured":"L. Zheng L. Yin Z. Xie J. Huang and C. Sun. 2023. Efficiently Programming Large Language Models using SGLang. CoRR."},{"key":"e_1_3_2_1_113_1","volume-title":"Lima: Less is more for alignment. arXiv preprint arXiv:2305.11206.","author":"Zhou C.","year":"2023","unstructured":"C. Zhou, P. Liu, P. Xu, S. Iyer, and J. Sun. 2023. Lima: Less is more for alignment. arXiv preprint arXiv:2305.11206."},{"key":"e_1_3_2_1_114_1","volume-title":"Agents: An Open-source Framework for Autonomous Language Agents. arxiv:2309.07870.","author":"Zhou W.","year":"2023","unstructured":"W. Zhou, Y.E. Jiang, L. Li, J. Wu, and T. Wang. 2023. Agents: An Open-source Framework for Autonomous Language Agents. arxiv:2309.07870."},{"key":"e_1_3_2_1_115_1","unstructured":"Y. Zhou A.I. Muresanu Z. Han K. Paster and S. Pitis. 2023. Large Language Models Are Human-Level Prompt Engineers. arxiv:2211.01910."},{"key":"e_1_3_2_1_116_1","unstructured":"X. Zhu J. Li Y. Liu C. Ma and W. Wang. 2023. A survey on model compression for large language models. arXiv preprint arXiv:2308.07633."}],"event":{"name":"FSE '24: 32nd ACM International Conference on the Foundations of Software Engineering","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"Porto de Galinhas Brazil","acronym":"FSE '24"},"container-title":["Companion Proceedings of the 32nd ACM International Conference on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3663529.3663849","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,23]],"date-time":"2024-11-23T22:22:10Z","timestamp":1732400530000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3663529.3663849"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,10]]},"references-count":116,"alternative-id":["10.1145\/3663529.3663849","10.1145\/3663529"],"URL":"https:\/\/doi.org\/10.1145\/3663529.3663849","relation":{},"subject":[],"published":{"date-parts":[[2024,7,10]]},"assertion":[{"value":"2024-07-10","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}