[{"key":"dc.contributor.author","value":"Evans, Andrew","language":null},{"key":"dc.date.accessioned","value":"2026-04-17T20:26:23Z","language":null},{"key":"dc.date.available","value":"2026-04-17T20:26:24Z","language":null},{"key":"dc.date.issued","value":"2026","language":"en"},{"key":"dc.identifier.uri","value":"http:\/\/hdl.handle.net\/2429\/94153","language":null},{"key":"dc.description.abstract","value":"The task of inferring human poses in 2D image data is known as 2D human pose estimation. Modern approaches to this problem use neural networks to generate keypoint predictions: estimates for the locations of keypoints, such as elbows and knees, associated with the human body. Top-down heatmap-based methods are one such approach, in which predictions are generated for each person in an image independently. This is often done by generating a set of heatmaps which can then be used to predict keypoint locations. Although these methods perform well in practice, they have several disadvantages: the heatmaps they generate are in general not probabilistic and cannot be used to attribute confidence to keypoint predictions, nor correlation between uncertain joint positions. These methods also cannot generally be used in end-to-end regression from input to predictions, as the argmax function is used to determine keypoint locations from heatmaps is non-differentiable.\r\nIn this thesis, we investigate ways to overcome these drawbacks, with the aim of providing a top-down 2D human pose estimation method which provides predictions which are multi-modal, probabilistic, and differentiable. We accomplish this by creating a model which can generate multiple Gaussian predictions via sampling, leveraging the effectiveness of heatmap-based approaches by generating these Gaussian predictions from heatmaps. Using a sampling procedure ensures predictions are not limited to a small number of preset modes, as is the case with Gaussian mixture models. Implicit Maximum Likelihood Estimation is also used during training, providing a less costly training procedure than performing maximum likelihood estimation on Gaussian mixture distributions. We show that this model is able to generate pose predictions with comparable accuracy to existing methods, while providing rich statistics on the certainty of its predictions over full poses, unlike traditional method which can only produce predictions for each keypoint independently. It is also able to provide multi-modal predictions, capturing information about ambiguous image data that standard heatmap approaches cannot.","language":"en"},{"key":"dc.language.iso","value":"eng","language":"en"},{"key":"dc.publisher","value":"University of British Columbia","language":"en"},{"key":"dc.rights","value":"Attribution-NonCommercial-NoDerivatives 4.0 International","language":"*"},{"key":"dc.rights.uri","value":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/","language":"*"},{"key":"dc.title","value":"2D pose regression using implicit Gaussian mixtures","language":"en"},{"key":"dc.type","value":"Text","language":"en"},{"key":"dc.degree.name","value":"Master of Science - MSc","language":"en"},{"key":"dc.degree.discipline","value":"Computer Science","language":"en"},{"key":"dc.degree.grantor","value":"University of British Columbia","language":"en"},{"key":"dc.contributor.supervisor","value":"Sigal, Leonid","language":null},{"key":"dc.contributor.supervisor","value":"Rhodin, Helge","language":null},{"key":"dc.date.graduation","value":"2026-05","language":"en"},{"key":"dc.type.text","value":"Thesis\/Dissertation","language":"en"},{"key":"dc.description.affiliation","value":"Science, Faculty of","language":"en"},{"key":"dc.description.affiliation","value":"Computer Science, Department of","language":"en"},{"key":"dc.degree.campus","value":"UBCV","language":"en"},{"key":"dc.description.scholarlevel","value":"Graduate","language":"en"}]