[{"key":"dc.contributor.author","value":"Chan, Abraham","language":null},{"key":"dc.date.accessioned","value":"2026-04-14T16:10:46Z","language":null},{"key":"dc.date.available","value":"2026-04-14T16:10:47Z","language":null},{"key":"dc.date.issued","value":"2026","language":"en"},{"key":"dc.identifier.uri","value":"http:\/\/hdl.handle.net\/2429\/94021","language":null},{"key":"dc.description.abstract","value":"Supervised machine learning (ML) relies on large datasets, which are prone to faults like mislabelling, deletion, or repetition.\r\nEven popular open datasets like ImageNet contain faults.\r\nOur experiments on four multi-class classification datasets show that when training data is corrupted, even highly accurate ML models misclassify test inputs.\r\n\r\nThis thesis investigates techniques against training data faults, and identifies ensembles - multiple independently trained ML models combined with simple majority voting, as most effective. \r\nWe propose two novel ensemble-based solutions, allowing ML applications to tolerate faulty training data, while minimizing practitioner effort.\r\n\r\n(1) We find that ensembles are the most resilient technique among existing techniques against mislabelled training data, even in safety-critical domains such as autonomous vehicles and healthcare, and requires minimal practitioner effort.\r\n(2) We find that ensembles are the most generalizable fault tolerance technique, even in problems beyond multi-class classification, such as object detection. Ensembles deliver resilience within acceptable runtime overheads in safety-critical applications.\r\n(3) We find that ensembles have a higher resilience to faulty training data than individual models, especially when using ensembles with architecturally diverse constituent models.\r\n\r\nDespite their effectiveness, ensembles face adoption challenges in real-world safety-critical systems.\r\nFirst, there are many ways to construct diverse ensembles, resulting in an exponential factorial search space.\r\nHow can one systematically build resilient ensembles against faulty training data?\r\nSecond, ensembles can misclassify test inputs when incorrect models outvote correct ones.\r\nCan we reduce incorrect predictions by ensembles during deployment?\r\n\r\nThus, this thesis presents two solutions.\r\n(1) We present D-semble, a framework that returns the most resilient ensembles within a time budget. D-semble encodes ensemble search into an evolutionary search problem, while using diversity as a heuristic.\r\n(2) We present ReMlX, a framework to reduce ensemble misclassifications at inference. ReMlX leverages the feature space of ML models, extracted with explainable AI, to maximize diversity in ensembles. \r\n\r\nIn summary, this thesis advances the development of resilient ML systems against faulty training data.\r\nBy developing comprehensive solutions, this work enables ensembles to be deployed with minimal effort in real-world safety-critical systems.","language":"en"},{"key":"dc.language.iso","value":"eng","language":"en"},{"key":"dc.publisher","value":"University of British Columbia","language":"en"},{"key":"dc.rights","value":"Attribution-NonCommercial-NoDerivatives 4.0 International","language":"*"},{"key":"dc.rights.uri","value":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/","language":"*"},{"key":"dc.title","value":"Building resilient ML applications using ensembles against faulty training data","language":"en"},{"key":"dc.type","value":"Text","language":"en"},{"key":"dc.degree.name","value":"Doctor of Philosophy - PhD","language":"en"},{"key":"dc.degree.discipline","value":"Electrical and Computer Engineering","language":"en"},{"key":"dc.degree.grantor","value":"University of British Columbia","language":"en"},{"key":"dc.contributor.supervisor","value":"Pattabiraman, Karthik","language":null},{"key":"dc.contributor.supervisor","value":"Gopalakrishnan, Sathish","language":null},{"key":"dc.date.graduation","value":"2026-05","language":"en"},{"key":"dc.type.text","value":"Thesis\/Dissertation","language":"en"},{"key":"dc.description.affiliation","value":"Applied Science, Faculty of","language":"en"},{"key":"dc.description.affiliation","value":"Electrical and Computer Engineering, Department of","language":"en"},{"key":"dc.degree.campus","value":"UBCV","language":"en"},{"key":"dc.description.scholarlevel","value":"Graduate","language":"en"}]