{"Affiliation":[{"label":"Affiliation","value":"Science, Faculty of","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","classmap":"vivo:EducationalProcess","property":"vivo:departmentOrSchool"},"iri":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","explain":"VIVO-ISF Ontology V1.6 Property; The department or school name within institution; Not intended to be an institution name."},{"label":"Affiliation","value":"Computer Science, Department of","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","classmap":"vivo:EducationalProcess","property":"vivo:departmentOrSchool"},"iri":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","explain":"VIVO-ISF Ontology V1.6 Property; The department or school name within institution; Not intended to be an institution name."}],"AggregatedSourceRepository":[{"label":"AggregatedSourceRepository","value":"DSpace","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/dataProvider","classmap":"ore:Aggregation","property":"edm:dataProvider"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/dataProvider","explain":"A Europeana Data Model Property; The name or identifier of the organization who contributes data indirectly to an aggregation service (e.g. Europeana)"}],"Campus":[{"label":"Campus","value":"UBCV","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeCampus","classmap":"oc:ThesisDescription","property":"oc:degreeCampus"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeCampus","explain":"UBC Open Collections Metadata Components; Local Field; Identifies the name of the campus from which the graduate completed their degree."}],"Creator":[{"label":"Creator","value":"Shan, Shidong","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/creator","classmap":"dpla:SourceResource","property":"dcterms:creator"},"iri":"http:\/\/purl.org\/dc\/terms\/creator","explain":"A Dublin Core Terms Property; An entity primarily responsible for making the resource.; Examples of a Contributor include a person, an organization, or a service."}],"DateAvailable":[{"label":"DateAvailable","value":"2009-03-06T19:04:20Z","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/issued","classmap":"edm:WebResource","property":"dcterms:issued"},"iri":"http:\/\/purl.org\/dc\/terms\/issued","explain":"A Dublin Core Terms Property; Date of formal issuance (e.g., publication) of the resource."}],"DateIssued":[{"label":"DateIssued","value":"2008","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/issued","classmap":"oc:SourceResource","property":"dcterms:issued"},"iri":"http:\/\/purl.org\/dc\/terms\/issued","explain":"A Dublin Core Terms Property; Date of formal issuance (e.g., publication) of the resource."}],"Degree":[{"label":"Degree","value":"Master of Science - MSc","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#relatedDegree","classmap":"vivo:ThesisDegree","property":"vivo:relatedDegree"},"iri":"http:\/\/vivoweb.org\/ontology\/core#relatedDegree","explain":"VIVO-ISF Ontology V1.6 Property; The thesis degree; Extended Property specified by UBC, as per https:\/\/wiki.duraspace.org\/display\/VIVO\/Ontology+Editor%27s+Guide"}],"DegreeGrantor":[{"label":"DegreeGrantor","value":"University of British Columbia","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeGrantor","classmap":"oc:ThesisDescription","property":"oc:degreeGrantor"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeGrantor","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the institution where thesis was granted."}],"Description":[{"label":"Description","value":"The well known Levenberg-Marquardt method is used extensively for solving\nnonlinear least-squares problems. We describe an extension of the Levenberg-\nMarquardt method to problems with bound constraints on the variables. Each\niteration of our algorithm approximately solves a linear least-squares problem\nsubject to the original bound constraints. Our approach is especially suited to\nlarge-scale problems whose functions are expensive to compute; only matrix-vector\nproducts with the Jacobian are required. We present the results of numerical\nexperiments that illustrate the effectiveness of the approach. Moreover,\nwe describe its application to a practical curve fitting problem in fluorescence\noptical imaging.","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/description","classmap":"dpla:SourceResource","property":"dcterms:description"},"iri":"http:\/\/purl.org\/dc\/terms\/description","explain":"A Dublin Core Terms Property; An account of the resource.; Description may include but is not limited to: an abstract, a table of contents, a graphical representation, or a free-text account of the resource."}],"DigitalResourceOriginalRecord":[{"label":"DigitalResourceOriginalRecord","value":"https:\/\/circle.library.ubc.ca\/rest\/handle\/2429\/5648?expand=metadata","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/aggregatedCHO","classmap":"ore:Aggregation","property":"edm:aggregatedCHO"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/aggregatedCHO","explain":"A Europeana Data Model Property; The identifier of the source object, e.g. the Mona Lisa itself. This could be a full linked open date URI or an internal identifier"}],"Extent":[{"label":"Extent","value":"7309262 bytes","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/extent","classmap":"dpla:SourceResource","property":"dcterms:extent"},"iri":"http:\/\/purl.org\/dc\/terms\/extent","explain":"A Dublin Core Terms Property; The size or duration of the resource."}],"FileFormat":[{"label":"FileFormat","value":"application\/pdf","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/elements\/1.1\/format","classmap":"edm:WebResource","property":"dc:format"},"iri":"http:\/\/purl.org\/dc\/elements\/1.1\/format","explain":"A Dublin Core Elements Property; The file format, physical medium, or dimensions of the resource.; Examples of dimensions include size and duration. Recommended best practice is to use a controlled vocabulary such as the list of Internet Media Types [MIME]."}],"FullText":[{"label":"FullText","value":"A Levenberg-Marquardt Method For Large-Scale Bound-Constrained Nonlinear Least-Squares by Shidong Shan BSc (Hon.), Acad ia University, 2006 A T H E S I S S U B M I T T E D I N P A R T I A L F U L F I L M E N T O F T H E R E Q U I R E M E N T S F O R T H E D E G R E E O F Master of Science in The Faculty of Graduate Studies (Computer Science) The University of Br i t i sh Columbia (Vancouver) Ju ly 2008 \u00a9 Shidong Shan 2008 Abstract T h e well known Levenberg-Marquardt method is used extensively for solving nonlinear least-squares problems. We describe an extension of the Levenberg- Marquardt method to problems wi th bound constraints on the variables. Each iteration of our algorithm approximately solves a linear least-squares problem subject to the original bound constraints. O u r approach is especially suited to large-scale problems whose functions are expensive to compute; only matr ix - vector products w i th the Jacobian are required. W e present the results of nu- merical experiments that illustrate the effectiveness of the approach. Moreover, we describe its application to a practical curve f i t t ing problem in fluorescence optical imaging. Contents Abstract i i Contents i i i List of Tables v List of Figures v i Acknowledgements v i i 1 Introduction 1 1.1 Overview 2 1.2 Definitions and notation 3 2 Background on Optimization 5 2.1 Unconstrained opt imizat ion 5 2.1.1 Line-search methods 6 2.1.2 Trust-region methods 6 2.2 Bound-constrained optimization 8 2.2.1 Active-set methods 8 2.2.2 Gradient projection methods 9 2.3 Algor i thms for bound-constrained optimization 10 3 Unconstrained Least Squares 12 3.1 Linear least squares 12 3.1.1 N o r m a l equations 12 3.1.2 T w o norm regularization 13 3.2 Nonlinear least squares 14 3.2.1 T h e Gauss-Newton method 14 3.2.2 The Levenberg-Marquardt method 16 3.2.3 U p d a t i n g the damping parameter 17 4 Nonlinear Least Squares W i t h Simple Bounds 20 4.1 Mot ivat ion 20 4.2 Framework 21 4.3 Stabi l izat ion strategy 24 4.4 T h e subproblem solver 25 4.5 The B C N L S algorithm 27 5 Numerical Experiments 30 5.1 Performance profile 30 5.2 Termination criteria 31 5.3 Benchmark tests w i t h A l g o r i t h m 566 31 5.4 C U T E r feasibility problems 34 6 Curve Fi t t ing in Fluorescence Optical Imaging 38 6.1 Brie f introduction to t ime domain optical imaging 38 6.2 Mathematica l models of fluorescence signals 40 6.3 Curve fitting 41 6.4 Fluorescence lifetime estimation 42 6.4.1 F i t t i n g parameters 42 6.4.2 Parameter bounds 43 6.4.3 Simulated data for lifetime fitting 43 6.4.4 Exper imental results 43 6.5 Fluorophore depth estimation 45 6.6 Opt ica l properties estimation 48 6.6.1 Simulation for estimating optical properties 49 6.6.2 Results for optical properties 50 6.7 Discussion 50 7 Conclusions and Future W o r k 52 Bibliography 53 List of Tables 5.1 Results of fifteen benchmark tests from A l g o r i t h m 566 . . . . . . 32 5.2 Summary of the benchmark tests from Algor i thm 566 33 5.3 Summary of C U T E r feasibility tests 36 6.1 Comparison of lifetime fitting w i t h and without D C component . 47 6.2 Comparison of lifetime fitting using B C N L S and A R T _ L M . . . . 48 6.3 Comparison of depth fitting results for (A) a l l data points, (B) data points w i t h depth > 1mm, (C) data points w i th S N R > 20, (D) data points w i th S N R > 20 and depth > 1mm 48 6.4 Comparison of depth fitting results for B C N L S and A R T X M for data points w i th S N R > 20 and depth > 1mm 49 6.5 and \/x^ values used in s imulation 50 6.6 Comparison of results of optical properties w i th A R T _ L M and B C N L S 50 List of Figures 3.1 Geometrical interpretation of least squares 13 3.2 The image of function q{p) 18 5.1 Performance profiles on benchmark problems from A l g o r i t h m 566: number of function evaluations 34 5.2 Performance profiles on benchmark problems from Algor i thm 566: C P U time 35 5.3 Performance profile on C U T E r feasibility problems, number of function evaluations 37 5.4 Performance profile on C U T E r feasibility problems, C P U time . 37 6.1 T i m e domain optical imaging 39 6.2 Fluorescent decay curve 39 6.3 Fluorophore identification w i t h lifetime 40 6.4 A n example of fitted T P S F curve 44 6.5 Lifetime fitting results of simulated data w i th varying depth, without fitting D C component 45 6.6 Lifetime fitting results of simulated data w i th varying depth, w i th fitting D C component 46 6.7 Est imated depth versus true depth 47 6.8 Relative accuracy of calculated depth versus signal S N R 49 Acknowledgements I am grateful to my supervisor D r . Michael Friedlander, who made the subject of Numerica l Opt imizat i on exciting and beautiful. W i t h his knowledge, guidance, and assistance, I have managed to accomplish this work. I would like to thank D r . Ian Mi t che l l for his careful reading of this thesis and his helpful comments that have improved this thesis. I am fortunate to have had the financial support for this work from the N a t u r a l Sciences and Engineering Research Counc i l of Canada ( N S E R C ) i n the form of a postgraduate scholarship. Also , I a m thankful for the M I T A C S Accelerate B C internship opportunity at A R T Advanced Research Technologies Inc. It has been a privilege to work w i t h G u o b i n M a , Nicolas Robitai l le , and Simon Fortier i n the Research and Development group at A R T Advanced Re- search Technologies Inc. The i r work and ideas on curve f itting in fluorescence optical imaging have formed a strong foundation for Chapter 6 in this thesis. In addit ion, I wish to thank the three of them for reading a draft of Chapter 6 and their valuable comments and suggestions for improving the chapter. The Scientific C o m p u t i n g Laboratory at the University of B r i t i s h Co lumbia has been a pleasant place to work. I have enjoyed the support and company of my current and past colleagues: Ewout van den Berg , El izabeth Cross, Mar iso l Flores Garr ido , H u i Huang , D a n L i , Tracy L a u , and Jelena Sirovljevic. A m o n g them, Ewout van den Berg has always been of great help to me. I a m deeply indebted to my parents and family for their unconditional love and understanding. M y brother Hongqing has inspired me and set a great example for me to fulfi l l my dreams. Thanks are also due to many of my friends for their care and friendship over the years. Above a l l , I sincerely thank my best friend and partner B i l l Hue, whose love, support, and faithful prayers give me strength and courage to cope in difficult times. W i t h o u t h i m , I could have not achieved this far i n my academic endeavor. Final ly , I want to dedicate this thesis to my late father, who suddenly passed away while I was working on this thesis. In his life, my father was always passionate that his children would have every educational opportunity. There is no doubt that my father would be very proud i f he could live to witness another academic accomplishment of his youngest daughter. To my father, Kunsong Shan Chapter 1 Introduction T h i s thesis proposes an algorithm for nonlinear least-squares problems subject to simple bound constraints on the variables. T h e problem has the general form minimize 5l|r'(a;)||2 subject to (,\u2022 R is a twice continuously difFer- entiable nonlinear function. Nonlinear least-squares problems arise in many data-f itt ing applications in science and engineering. For instance, suppose that we model some process by a nonlinear function 0 that depends on some parameters x , and that we have the actual measurements j\/j at time ti\\ our a i m is to determine parameters x so that the discrepancy between the predicted and observed measurements is minimized. The discrepancy between the predicted and observed data can be expressed as r i (x ) = 4>{x,ti) - Vi. T h e n , the parameters x can be estimated by solving the nonlinear least-squares problem minimize |||T\"(a;)||2- (1.1) Very large nonlinear least-squares problems arise in numerous areas of ap- plications, such as medical imaging, tomography, geophysics, and economics. In many instances, both the number of variables and the number of residuals are large. It is also very common that only the number of residuals is large. M a n y approaches exist for the solution of linear and nonlinear least-squares problems, especially for the case where the problems are unconstrained. See Bjorck [3] for a comprehensive discussion of algorithms for least squares, including detailed error analyses. Because of the high demand in the industry, nonhnear least-squares soft- ware is fairly prevalent. M a j o r numerical software libraries such as S A S , and programming environments such as Mathemat i ca and M a t l a b , contain nonlin- ear least-squares implementations. Other high quality implementations include, for example, D F N L P [50], M I N P A C K [38, 41], and N L 2 S 0 L [27]; see More and Wright [44, Chapter 3]. However, most research has focused on the nonlinear least-squares problems without constraints. For problems wi th constraints, the approaches are fewer (also see Bjorck [3]). In practice, the variables x often have some known lower and upper bounds from its physical or natural settings. T h e a im of this thesis is to develop an efficient solver for large-scale nonlinear least-squares problems wi th simple bounds on variables. 1.1 Overview T h e organization of the thesis is as follows. In the remainder of this chapter, we introduce some definitions and notation that we use in this thesis. Chapter 2 reviews some theoretical background on unconstrained and bound-constrained optimization. O n unconstrained opt imization, we summarize the basic strate- gies such as line-search and trust-region methods. For bound-constrained opt i - mizat ion, we review active-set and gradient-projection methods. We also briefly discuss some existing algorithms for solving the bound-constrained optimization problems, including A S T R A L [53], B C L S [18], and L - B F G S - B [57]. Chapter 3 focuses on unconstrained least-squares problems. We start w i th a brief review of linear least-squares problems and the normal equations. We also review two-norm regularization techniques for solving il l-conditioned linear systems. For nonlinear least-squares problems, we discuss two classical non- linear least squares algorithms: the Gauss-Newton method and the Levenberg- Marquardt method. We also explain the self-adaptive updating strategy for the damping parameter in the Levenberg-Marquardt method. These two methods and the updat ing strategy are closely related to the proposed algorithm in this thesis. Chapter 4 is the main contribution of this thesis. We explain our pro- posed algorithm, named B C N L S , for solving the bound-constrained nonlinear least-squares problems. We describe our algorithm wi th in the A S T R A L [53] framework. The B C N L S algorithm is based on solving a sequence of linear least-squares subproblems; the bounds i n the subproblem are copied verbatim from the original nonlinear problem. The B C L S least-squares solver forms the computational kernel of the approach. The results of numerical experiments are presented i n Chapter 5. In Chap - ter 6, we illustrate one real data-f itt ing application arising in fluorescence optical imaging. The B C N L S software package and its documentation are available at ht tp : \/ \/ w w w . cs. ubc. ca \/ labs \/ sc l \/bcn ls \/ . 1.2 Definitions and notation In this section, we briefly define the notation used throughout the thesis. The gradient of the function f{x) is given by the n-vector g{x) = Vf{x) = and the Hessian is given by dfix)\/dxx dfix)\/dX2 df{x)\/dx\u201e H{x) = Vy{x) = 9 ^ | V M 9 X 2 c Ilk. 120X1 dx'i I dx. H ^ X l dxidXn 0X2 oxn Sim T h e Jacobian of r{x) is the m-by-n matr ix J ( x ) = Vr{xf = Vr^ixf Vr2{xf For nonlinear least-squares problems, the objective function has the special form and, in this case, fix) = i r ( x ) ^ r ( x ) , g{x) = J{xfr{x), m H(x) = J{xfj{x) + '\u00a3^ri[x)V^ri{x). (1.2) (1.3) (1.4) Most specialized algorithms for nonlinear least-squares problems exploit the special structure of the nonlinear least-squares objective function (1.2). Our proposed algorithm also explores this structure and makes use of the above structural relations. We use the following abbreviations throughout: \u2022 Xk: the value of x at iteration fc; \u2022 x*: a local minimizer of \/ ; \u2022 ik,Uk- the lower and upper bounds for the subproblem at iteration k. \u2022 Tk = r{xk), vector values of r at x^; \u2022 fk = f{xk), function value of f &X Xk\\ \u2022 Jk \u2014 J(xk), Jacobian of r{x.) at Xjt; \u2022 gk = g{xk), gradient of f{x) at x^; \u2022 Hk = H{xk), Hessian of \/ ( x ) at Xk- Chapter 2 Background on Optimization This chapter reviews some fundamental techniques for unconstrained and bound- constrained optimization. We begin wi th some basic strategies for unconstrained opt imizat ion problems, then we examine some specialized methods for bound- constrained problems. 2.1 UnconstrEiined optimization T h e general unconstrained optimization problem has the simple mathematical formulation minimize f{x). (2.1) x\u20acR\" Hereafter we assume that f{x) : R \" \u2014> E is a smooth real-valued function. B y Taylor 's theorem, a smooth function can be approximated by a quadratic model in some neighbourhood of each point x \u00a3 R \" . The optimality conditions for unconstrained optimization problem (2.1) are well-known. We sunmiarize the conditions below without explanation. For more detailed descriptions, see Nocedal and Wright [47, Chapter 2]. Firs t -Order Necessary Conditions: If x ' is a local minimizer and \/ is continuously differentiable i n an open neighbourhood of x*, then g(x*) = 0. Second-Order Necesssiry Conditions: If x* is a local minimizer of \/ and V - ^ \/ exists and is continuous i n an open neighbourhood of x*, then ^(x*) = 0 and H{x*) is positive semidefinite. Second-Order Sufficient Conditions: Suppose that V ^ \/ is continuous in an open neighbourhood of x*, ^(x*) = 0, and H{x*) is positive definite. Then X* is a strict local minimizer of \/ . In general, algorithms for unconstrained optimization generate a sequence of iterates {xk}'^-Q using the rule Xk+\\ = Xk+dk, where dk is a direction of descent. The two main algorithmic strategies are based on different subproblems for computing the updates dk- 2.1.1 Line-search methods Line search methods obtain a descent direction dk in two stages. F i rs t , dk is found such that g\u00efdk < 0. (2.2) T h a t is, it requires that \/ is strictly decreasing along dk wi th in some neighbour- hood of Xfc. The most obvious choice for search direction is d^ = \u2014g^, which is called the steepest-des cent direction. However, any direction dk that solves the equations Bkdk = \u2014Qk for some positive definite matr ix Bk also satisfies (2.2). Second, the distance to move along dk is determined by a step length ak that approximately solves the one-dimensional minimizat ion problem minimize f(xk+adk)- a>0 Popular line search conditions require that ak gives sufficient decrease in \/ and ensures that the algorithm makes reasonable progress. For example, the Armijo condition imposes sufficient decrease by requiring that f{xk + akdk) < f{xk) + aiUk\u00c7kdk, (2.3) for some constant cri \u20ac (0,1). The curvature condition imposes \"reasonable progress\" by requiring that ak satisfies gixk + akdkfdk > (r2gkm, [72Afc ,Afc], if pfc G [771,772], (2.8) ( 7 i A f c , 7 2 A f c ) , if Pk0, or X* = Ui, if g{x*)i < 0, or k = Ui. Furthermore, the strictly binding set is defined as Bs(x*) = B{x*) n {i : g{x*)i ^ 0}. The standard first-order necessary condition for a local minimizer x* requires g{x')i = Q {0TiiB{x*). (2.11) T h e expression (2.11) shows the importance of the binding constraints to the bound-constrained opt imizat ion problems. T h e first-order necessary condition is also called the first-order Karush-Kuhn-Tucker ( K K T ) condition. T h e second- order sufficient condition for x* to be a local minimizer of the bound-constrained problem (2.10) is that it satisfies the K K T condition and also w^H{x*)w > 0, for a l l vectors w ^ 0 such that Wi \u2014 0 for each i \u20ac Bs{x*). See [47, Chapter 12] for the detailed theory of constrained optimization. 2.2.1 Active-set methods Active-set methods for constrained optimization part i t ion the variables into active and inactive sets. Those variables whose indices are in the binding set are also classified as fixed variables, while the variables whose indices are not active are classified as free variables. Given any set of free variables, we can define the reduced gradient and the reduced Hessian matr ix , respectively, as the gradient and the Hessian matr ix of f{x) w i th respect to the free variables. In this terminology, the second-order sufficient condition requires that the reduced gradient be zero and that the reduced Hessian matr ix be positive definite at x*. In general, active-set algorithms for the solution of bound-constrained prob- lems use unconstrained minimizat ion techniques to explore the reduced problem defined by a set of free variables. Once this exploration is complete, a new set of free variables is chosen wi th the a im of dr iv ing the reduced gradient to zero. One drawback of the active-set method is that the working set changes slowly [47]. Usual ly at each iteration at most one constraint is added to or dropped from the working set. Thus the active-set method may require many iterations to converge on large-scale problems. For example, if there are ko ac- tive constraints at the starting point XQ, and kg constraints are active at the solution, then at least \\ks \u2014 fco| iterations wi l l be required to reach the solution. 2.2.2 Gradient projection methods A gradient-projection method is a special type of active-set method that allows the active set to change rapidly from iteration to iteration. It is a very efficient method when there is an effective method for the projection onto the feasible set. For example, projection onto bound constraints can be done wi th linear cost. However, there are also other sets that can be efficiently projected onto, see, e.g.. Conn , Gou ld , and Toint [9, Chapter 12]. Gradient projection methods can be used to minimize both convex and non- convex functions; the feasible set, however, must be convex. One popular ap- proach to the gradient projection methods is to implement each iteration by a two-stage process. F i r s t , we compute an active face of the box by searching along the steepest descent direction \u2014g from the current point x, and we com- pute a generalized Cauchy point by searching along the piecewise-linear path i n the feasible region. The working set is then defined to be the set of bound con- straints that are active at the Cauchy point. In the second stage, we compute a search direction using the active face by solving a subproblem i n which the active components of x are in the working set. Th is approach is implemented by B C L S [18], T R O N [33], Lancelot [8], L - B F G S - B [57], and some others. Given an arbitrary point x, the projection of x onto the feasible bounded region is defined as follows. The i t h component of the projection of x is given by r Ci if xi < \u00a3i, p{x,e,u)i = l Xi i\u00eexi \u20ac [ei,ui], [ Ui if Xi > Ui. The projected gradient of the objective function \/ at a point x onto the feasible region is defined by gix) = P{x-g(x),\u00ea,u)-x. (2.12) It is well-known that x* is a first-order stationary point for (2.10) if and only if gix*) = 0 [6], i.e., X* ^Pix* -g{x*),e,u). If the second-order sufficient condition can be satisfied, then the gradient- projection method is guaranteed to identify the active set at a solution in a finite number of iterations. After it has identified the correct active set, the gradient-projection algorithm reduces to the steepest-descent algorithm on the subspace of free variables. A s a result, this method is often used in conjunction w i t h a second-order method in order to achieve a faster rate of convergence. 2.3 Algorithms for bound-constrained optimization M a n y algorithms have been proposed to solve (2.10). For example, algorithms have been proposed by B y r d el al [5], C o n n et al [6, 7, 8], Hager and Zhang [23, 24], L i n and More [33], Zhu el al [57], and others. T h e main framework of recent approaches follows the two-stage process as described in Section 2.2.2. However, the algorithms differ on how to identify the active face and how to compute the search direction at each iteration. For example, the method in [6, 7] uses a two-norm trust-region method, and it computes the search step by applying the truncated conjugate-gradient method to the quadratic approximation of the objective function on the active face subject to the trust-region constraints. A similar approach is also used by L i n and More [33] in T R O N , but the t r u n - cated conjugate-gradient method is applied to the quadratic approximation of the objective function on the subspace parallel to the active face. The active-set method proposed by Hager and Zhang [23, 24] uses a backtracking line-search to identify the active face and apply the unconstrained method to the objective function on the active face. One well-known quasi-Newton method for solv- ing (2.10) is the L - B F G S - B algorithm [57], which computes a search direction by truncating the unconstrained L - B F G S [34] update relative to the subspace parallel to the active face. More recently, X u and Burke [53] have proposed the A S T R A L algorithm for solving large-scale nonlinear bound-constrained optimization problems. A S - T R A L is an active-set algorithm that uses both active-set identification tech- niques and l imited memory B F G S updat ing for the Hessian approximation. A t each iteration, A S T R A L uses a gradient projection step to determine an active face and then forms an o\u0302o trust-region quadratic programming (QP) subprob- lem relative to the active face. The trust-region subproblems are solved using pr imal -dual interior point techniques. The A S T R A L algorithm has a close re- lationship to the proposed algorithm in this thesis, thus we wi l l describe the A S T R A L algorithm in more detail in Chapter 4. One particular software package for solving bound-constrained problems that plays an important role in this thesis is B C L S [18]. The B C L S algorithm solves the linear least-squares problem wi th simple bounds on the variables minimize ^\\\\Ax - b\\\\l + ^\u00f4^\\\\x\\\\l + c'^x (2.13) subject to \u00a3 < X < u, where A is m x n (can be any shape), b G R\"\" , l,u,c e E \" , and 5 is a damping parameter. In Chapter 4, we show that the problem (2.13) has the exact form for the subproblems to be solved in our proposed algorithm ( B C N L S ) for bound- constrained nonlinear least-squares. In fact, the B C N L S algorithm utilizes the B C L S package to solve the subproblem at each iteration. We explain the B C L S algorithm and describe how to use it effectively w i th in the B C N L S algorithm i n Chapter 4. Chapter 3 Unconstrained Least Squares T h i s chapter reviews some theoretical background on unconstrained least squares. We start wi th a preliminary review of linear least squares in Section 3.1, and we describe the two-norm regularization techniques for solving the i l l -conditioned linear systems. In Section 3.2, we describe two classical methods for uncon- strained nonlinear least squares problems: the Gauss-Newton method and the Levenberg-Marquardt method. We also describe a self-adaptive strategy for updat ing the damping parameters in the Levenberg-Marquardt method. 3.1 Linear least squares T h e classical linear least-squares problem has the form minimize I I I A x - f e U ^ , (3.1) where is an m x n matr ix , and b is an m-vector. Th i s problem belongs to a special class of opt imizat ion problems wi th much interest. First ly , the problem (3.1) is a special case of nonhnear least squares. Secondly, the classical methods for nonhnear least squares, such as the Gauss-Newton method and the Levenberg-Marquardt method in Section 3.2, iteratively solve hnear least- squares subproblems. Geometrically, to solve the problem (3.1), we want to find a vector x \u20ac R \" such that the vector Ax \u20ac R\u2122 is the closest point i n the range of A to the vector b. This geometrical interpretation is i l lustrated in Figure 3.1. The problem (3.1) has been studied extensively, and well-known numeri - cal algorithms exist. See Lawson and Hanson [29], Go lub and V a n Loan [21, Chapter 5], and Trefethen and B a u [52, Lecture 11] for more detail. 3.1.1 Normal equations For a given x, define the residual by r = b \u2014 Ax. A vector x s R \" minimizes the norm of the residual if and only if r is orthogonal to the range of A: A^r = 0. Figure 3.1: Geometrical interpretation of least squares T h i s condition is satisfied if and only if \u00c0^M = A^b. (3.2) T h e equations in (3.2) is called the normal equations. Th i s system is nonsingular if and only if A has full rank. Consequently, the solution x is unique if and only if A has full rank. 3.1.2 Two norm regularization Regularization is a common technique for i l l -conditioned linear least squares problems [17]. The solutions of ill-posed problems may not be unique or may be sensitive to the problem data. The regularized least-squares problem has the quadratic form m i n i m K e ^\\\\Ax - bg + ^S'^\\\\x\\\\l (3.3) for some <5 > 0. Solving (3.3) is equivalent to solving the linear least squares problem 1 minuruze - x\u20acR\" 2 Note that the matr ix in (3.4) necessarily has full column rank. Historically, the study of problem (3.4) as a function of 5 has also been called ridge regression, damped least squares, or Tikhonov regularization [51]. The regularization is a technical device of changing the ill-posed problem to an approximate problem whose solutions may be well-behaved and preferable. For example, it may be preferable to have approximate solutions w i t h small norms. The intent of using 5 is to prevent ]|a;()2 from getting large when A is i l l -conditioned. B y performing singular value decomposition A = U'EV'^, we can show that Ml-tier\/. A ' ' b \u00e2l X \u2014 0 (3.4) where is the i t h column of the orthogonal matr ix U and CTJ is the i t h singular value of A. Note that increasing S would cause the norm \\\\xs || to decrease. Thus we can obtain some acceptable compromise between the size of the solution vector X and the size of the norm of the residuals by choosing proper 5. For further discussion about regularization and the choice of parameter S, refer to Lawson and Hanson [30, Chapter 25]. The two-norm regularization can also be regarded as a trust-region method. T h e solution x to equations (3.3) and (3.4) also solves mimmize i | | A E - 6 | | | (3.5) subject to ||xj|2 < A , for some A > 0. It can be shown that the solution of (3.4) is a solution of the trust-region problem (3.5) if and only i f x is feasible and there is a scalar 5 >0 such that x satisfies (3.4) and S{A - \\\\x\\\\2) = 0. See Nocedal and Wright [47, Chapter 10] for the proof. Instead of setting the trust region A directly, two-norm regularization mod- ifies A impl ic i t ly by adjusting the damping parameter S. The coimection be- tween the trust-region method and two-norm regularization is also revisited i n Section 3.2.2. Regularization techniques play a key role in this thesis. The B C L S algorithm in Section 4.4 solves a bound-constrained two-norm regularized linear least- squares problem. The Levenberg-Marquardt method discussed i n Section 3.2.2 solves a regularized linear least squares subproblem at each iteration. Moreover, the algorithm for the nonhnear least squares problems wi th simple bounds de- scribed in Chapter 4 solves a regularized linear least squares subproblem wi th bound constraints at each iteration. 3.2 Nonlinear least squares W i t h the theoretical background of opt imizat ion i n Chapter 2 and methods for linear least-squares problems from Section 3.1, we now describe two classi- cal methods for imconstrained nonlinear least-squares problems (1.1), namely, the Gauss-Newton method and the Levenberg-Marquardt method. For a more detailed description of these methods, refer to [47, Chapter 10]. 3.2.1 The Gauss-Newton method Perhaps one of the most important numerical methods is Newton's method. T h e Newton search direction is derived from the second-order Taylor series approximation to f{x + d), f(x + d)^f + g'^d+^cFHd. (3.6) Assume that H is positive definite. B y minimiz ing (3.6) at each step k, the Newton search direction is computed as the solution of the linear system Hd = -g. Newton's method requires H to be positive definite in order to guarantee that d is a descent direction. Also , H must be reasonably well-conditioned to ensiu-e that \\\\d\\\\ is small . W h e n H is positive definite, Newton's method typical ly has quadratic convergence. However, when H is singular, the Newton direction is not defined. Since Newton's method requhres computation of the second derivatives, it is only used when it is feasible to compute H. The Gauss-Newton method difi'ers from Newton's method by using an ap- proximation of the Hessian matr ix . For nonlinear least squares problems, i n - stead of using the Hessian i n (1.4), the Gauss-Newton method approximates the Hessian as H \u00ab J . (3.7) B y (1.3), we have g = J ^ r . Thus at each iteration, the Gauss-Newton method obtains a search direction by solving the equations j'^Jd^^-j'^r. (3.8) The Hessian approximation (3.7) of the Gauss-Newton method gives a num- ber of advantages over the Newton's method. F i r s t , the computation of J only involves the Jacobian computation, and it does not require any addit ional derivative evaluations of the indiv idual residual Hessians V^ri(a;) , which are needed in the second term of (1.4). Second, the residuals ri{x) tend to be small in many apphcations, thus the first term J in (1.4) dominates the second term, especially when Xk is close to the solution a;*. In this case, J is a close approximation to the Hessian and the convergence rate of Gauss-Newton is similar to that of Newton's method. T h i r d , J is always at least positive semi-definite. W h e n J has full rank and the gradient g is nonzero, the Gauss-Newton search direction is a descent direction and thus a suitable direction for a line search. Moreover, if r{x*) = 0 and Jix*)\"^ J{x*) is positive definite, then x* is an isolated local min imum and the method is locally quadratically convergent. The fourth advantage of Gauss-Newton arises because the equations (3.8) are the normal equations for the linear least-squares problem minimize \\\\\\Jd + r\\\\l. (3.9) d In principle, we can find the Gauss-Newton search direction by applying linear least-squares algorithms from Section 3.1 to the subproblem (3.9). The subproblem (3.9) also suggests another view of the Gauss-Newton method. If we consider a linear model for the vector function r (x ) as r{xk + d)xrk + Jkd, then the nonlinear least-squares problem can be approximated by l\\\\r(xk + d)g^l\\\\Jkd+r4l Thus, we obtain the Gauss-Newton search direction by minimiz ing the linear least-squares subproblem (3.9). Implementations of the Gauss-Newton method usually perform a line search in the search direction d, requiring the step length to satisfy conditions as those discussed i n Chapter 2, such as the A r m i j o and Wolfe conditions described in Section 2.1.1. W h e n the residuals are small and J has ful l rank, the Gauss- Newton method performs reasonably well. However, when J is rank-deficient or near rank-deficient, the Gauss-Newton method can experience numerical dif- ficulties. 3.2.2 The Levenberg-Mcirquardt method Levenberg [32] first proposed a damped Gauss-Newton method to avoid the weaknesses of the Gauss-Newton method when J is rank-deficient. Marquardt [37] extended Levenberg's idea by introducing a strategy for controlling the damping parameter. The Levenberg-Marquardt method is implemented in the software package M I N P A C K [38, 42]. The Levenberg-Marquardt method modifies the Gauss-Newton search direc- t ion by replacing J^J w i th J^J + S'^D ,where D is diagonal, 5 >0. Hereafter, we use D = I to simplify our description of the method. A t each iteration, the Levenberg-Marquardt search direction d satisfies iJ^J + \u00d4^I)d=-J^r. (3.10) The Levenberg-Marquardt method can also be described using the trust- region framework of Chapter 2. Let A be the two-norm of the solution to (3.10), then d is also the solution of the problem minimize |||<\/<\u0302 + ' \"| l2 subject to \\\\d\\\\2 < A . Hence, the Levenberg-Marquardt method can be regarded as a trust-region method. The main difference between these two methods is that the trust-region method updates the trust region radius A directly, whereas the Levenberg- Marquardt method updates the parameter S, which in turn modifies the value of A implicit ly . For more description of the differences between the two methods, see [42, 43, 55, 56] for details. Note that the system (3.10) is the normal equations for the damped linear least-squares problem . . . 1 mimmize - d 2 J SI d + (3.11) The clamped least-squares problem (3.11) has the same form as (3.4) in Sec- t ion 3.1.2. Similar to the Gauss-Newton method, the Levenberg-Marquardt method solves the nonlinear least-squares problem by solving a sequence of l i n - ear least-squares subproblems (3.11). T h e local convergence of the Levenberg- Marquardt method is also similar to that of the Gauss-Newton method. The key strategy decision of the Levenberg-Marquardt method is how to choose and update the damping parameter S at each iteration. 3.2.3 Updating the damping parameter M u c h research has been done on how to update \u00d4. See, for example, [14, 36, 42, 46] for more details. From equations (3.10), if 5 is very small , the Levenberg- Marquardt step is similar to the Gauss-Newton step; and if (5 is very large, then d w \u2014-^9, and the resulting step is very similar to the steepest descent direction. Thus , the choice of 6 i n the Levenberg-Marquardt method affects both the search direction and the length of the step d. If x is close to the solution, then we want the faster convergence of the Gauss-Newton method; if X is far from x\", we would prefer the robustness of the steepest descent method. Similar to the trust-region method, the ratio pk (2.7) between the actual reduction of the objective function and the predicted reduction of the quadratic model is used to control and update the damping parameter 5k in the Levenberg- Marquardt method. In general, if the step dk is successful and pk is satisfactory, we accept dk and reduce S; otherwise, we reject dk and enlarge 5. Natmal ly , when Pk is close to 1 and the step dk is very successful, we may reduce Sk to near 0, then the next step would become an approximate Gauss-Newton step. O n the other hand, when dk is large and pk < 0, it is reasonable to enlarge Sk to move the search direction to a descent direction. We could adopt this strategy for updat ing the trust region radius A t and modify (2.8) for updating the damping parameter in the Levenberg-Marquardt method. For example, one possible approach, described by Fan [15], is T h e main drawback of this approach is that it is not sensitive to small changes in the values of pk- There exist jumps i n Sk-^-i\/Sk across the thresholds of rji and 772 [15]. Recently, Yamashi ta and Pukushima [54] show that if the Levenberg-Marquardt parameter is chosen as and if the in i t ia l point is sufficiently close to the solution x*, then the Levenberg- Marquardt method converges quadratically to the solution x*. D a n et a l [10] propose an inexact Levenberg-Marquardt method for nonhnear least-squares problem wi th the parameter chosen as (5fc\/4, if pk > Tj2, Sk, i f Pfc \u20ac [771,772], 4 4 , i f Pk 0. This subproblem has the benefit that it can be solved directly by the software package B C L S [18]. One remaining challenge lies in how to adjust 5k at each iteration. For the algorithm to have a rapid convergence rate, it is important to have an effective strategy to update 5k at each step. Essentially, the algorithm uses 5k to control both the search direction and the step size. A lgor i thm 4.1 outlines the basic framework of our approach. The main com- putational component of this algorithm is solving the bound-constrained linear least-squares subproblem (4.6). In this way, the bounds are handled directly by the subproblem solver. Thus, we can apply the self-adaptive damping strat- egy described in Section 3.2.3, and the local and global convergence properties remain the same as in the unconstrained cases. A t this point, it is necessary to show that the proposed framework is a reasonable approach for solving the bound-constrained nonhnear least-squares problem (4.1). The key global and local convergence properties of A lgor i thm 4.1 can be established by showing that the B C N L S algorithm has a very shnUar framework as the A S T R A L algorithm. The A S T R A L algorithm solves a general bound-constrained nonlinear prob- lem (2.10) by solving a sequence of o\u0302o trust-region subproblems. The subprob- lem has the basic template minimize ^S\" Bkd + g^d (4.7) d subject to Ik < d < Uk, \\\\d\\\\oo < Ak. T h e objective function is based on the quadratic model (2.5), where Bk'isa sym- metric positive definite Hessian approximation of the objective function at step A l g o r i t h m 4.1: Outl ine of the B C N L S method 1 initialization: fc < \u2014 0, a;o, 5o given. 2 while not optimal do 3 Solve the bound-constrained subproblem (4.6) for dk- 4 Compute the ratio pk (2.7). 5 if Pk is satisfactory then 6 Xk-+-i *- Xk + dk 7 else 8 1 Xk+l *\u2014 Xk- 9 end 10 Update the damping parameter Ok- 11 ie *-k+I. 12 end k. The A S T R A L algorithm solves the quadratic trust-region subproblem (4.7) by using a pr imal-dual interior point method. The outline of the A S T R A L algorithm is shown i n A lgor i thm 4.2. Note that the gradient projection restart procedure (line 6) m A lgor i thm 4.2 is a strategy to assist in the identification of the active constraints at the cur- rent solution. Under the standard nondegeneracy hypothesis, X u and Burke [53] established that the framework of A lgor i thm 4.2 converges both globally and locally to a first-order stationary point. Moreover, the sequence of {xk} gener- ated by the A S T R A L algorithm converges quadratically to x* under standard assumptions. The A S T R A L algorithm and its convergence theory follow the pattern estabhshed by Powell in [49] and has been used for similar algorithms from the Uterature [4, 6, 9, 47]. For detailed theoretical proof of the convergence properties, see [53]. After a l l , the nonlinear least squares problem (4.1) is a special case of the nonlinear problem (2.10). The similarity between the frameworks of Algo- r i t h m 4.1 and A l g o r i t h m 4.2 suggests that the convergence properties of the A S T R A L algorithm apply to the B C N L S algorithm for the bound-constrained nonlinear least-squares problem. The main differences lie in the specific tech- niques used for solving the quadratic subproblems. In this sense. A lgor i thm 4.1 specializes the A S T R A L algorithm to a special class of nonlinear least-squares opt imizat ion problems wi th simple bounds on variables. The new approach takes advantage of the special structure of the objective function by ut i l iz ing the special relationships between its gradient, Hessian and the Jacobian. Our approach eliminates the need to bui ld , compute and possibly modify the Hessian of the objective. Therefore, the B C N L S algorithm can make use of the function and gradient evaluations more efficiently than other similar approaches for the general nonlinear optimization problems. A l g o r i t h m 4.2: Outl ine of the A S T R A L method 1 initialization: \u00c2; <\u2014 0, l o given, SQ > 0, \u00d4Q min{maxi {u j \u2014 Zi},(5o}; 2 initial ize symmetric positive definite BQ \u20ac R . 3 choose a set of controlling constants. 4 while not optimal do 5 if ||dfc||oo is not ac\u0153ptable then 6 Perform a gradient projection restart step. 7 end 8 Solve the trust-region subproblem (4.7) for dk- 9 Compute the ratio pk (2.7). 10 if Pk is satisfactory then 11 1 Xk+i ^ Xk + dk 12 else 13 Xk-^l Xk- 14 end 15 Update symmetric positive definite matr ix Bk- 16 Update the trust-region radius. 17 k*-k + l. 18 end 4.3 Stabilization strategy T h e A S T R A L algorithm makes use of the l^o trust-region on the search direction d for the subproblems. T h e \u00a3oo trust region constraint where e = ( 1 , 1 , . . . , 1)'^. B y superimposing the \u00a3oo trust region constraint onto the feasible bounds, the resulting feasible region continues to have a nice rectangular box shape, and the subproblems can be solved by applying the bound-constrained quadratic programming techniques. A t each iteration k, the A S T R A L algorithm updates the trust-region radius Ak based on the perfor- mance of the previous steps. The strategy of updating the trust-region radius prevents the norm of d from getting large and thus has the effect of stabilizing the algorithm. The B C N L S algorithm uses a very simUar strategy to the A S T R A L algo- r i t h m . Rather than updating the trust-radius directly, we use the two-norm reg- ularization technique and formulate the subproblem as the form of (4.6). The damping parameter 5k is then updated by using the self-adaptive Levenberg- Marquardt strategy in Section 3.2.3. The theoretical equivalence between the trust-region method and the two-norm regularization techniques has been es- tablished in Section 3.2.2 and Section 3.2.3. Compared w i th the trust-region l|4||oo 0 2 Set scaling factor ao > 0 3 Set updating constant P \u00c7 (0,2] 4 Define a nonnegative nonlinear function q{p) 5 Set the min imum scaling factor 0 < amin < cto 6 Set the starting point do for B C L S T Set step acceptance parameter 0 < po < 1 8 Set optimality tolerance \u20ac 9 Set max imum iterations 10 Set max imum function evaluations 11 A; +- 0 12 Xk +\u2014 XQ 13 dfc \u00ab\u2014 do 14 Compute the residuals <\u2014 funObj (xk) 15 Sk aollrfcll\". 16 Set iStepAccepted < \u2014 true 17 while ll\u0302 fcll > e do 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 38 36 37 38 39 40 e n d 41 x*-Xk, g*-9k, 9^9k- if iStepAccepted then Compute the gradient gk <\u2014 J p r o d ( r t , 2) Compute the projected gradient gk <\u2014 P{xk \u2014 gk,i) u) \u2014 Xk i k - i - x k Uk*-U-Xk end [dk] *- BCLSiJprod, - r^ , 4 , Uk, do, 0, Sk) Compute new residuals rnew *\u2014 funObj (x^ + dk) Compute the ratio pk (2.7). if pk > Po then Xk+i *- Xk+dk rk+l *\u2014 ''new do *\u2014 dk IStepAccepted else Xk+l <- Xk Tk+l Tk IStepAccepted end Set ak+\\ max{Q;min,a!fcg(pfc)}. Update 5k+i ^ ak+iWrkW\"\u2022 k^k + 1. true false. subproblems and potentially use more computation of the matrix-vector prod- ucts w i th Jacobian and its transpose. However, in general, large-scale problems requires more computing time to evaluate the function and gradient values. Therefore, the B C N L S algorithm can perform well for large-scale problems. Our implementation of A lgor i thm 4.3 shows that we have achieved our p r i - mary goals for the bound-constrained nonlinear least-squares solver. The B C - N L S algorithm has the following basic features. F i r s t , two-norm regularization and damping techniques are used in solving the subproblems. Second, Hessian approximations are never formed and no second-order derivatives are required to be computed. T h i r d , the simple bounds are handled directly by the sub- problem solver B C L S and the sequence of {xk} is always feasible at any time. Fourth , the B C L S algorithm makes use of the conjugate-gradient type method L S Q R . B y inherit ing the significant features from the B C L S and L S Q R soft- ware packages, the B C N L S algorithm uses the Jacobian only as an operator. It only requires the user to provide matrix-vector products for the Jacobian and its transpose. Thus the B C N L S package is amenable for bound-constrained nonlinear least-squares problems i n large-scale. Chapter 5 Numerical Experiments We give the results of numerical experiments on two sets of nonlinear least- squares problems. The first set is composed of the fifteen benchmark nonlinear least-squares problems from the Net l ib collection A lgor i thm 566 [1, 39, 40]. The second set of test problems are generated from the C U T E r testing environment [22]. Us ing these two sets of test problems, we compare the performances of B C - N L S , L - B F G S - B and A S T R A L . The L - B F G S - B Fortran subroutines were down- loaded from [5, 57]. The A S T R A L M a t l a b source codes were downloaded from [53]. The comparisons are based on the number of function evaluations and C P U times. 5.1 Performance profile To il lustrate the performance, we use the performance profile technique de- scribed i n Dolan and More [12]. The benchmark results are generated by run- ning the three solvers on a set of problems and recording information of interest such as the number of function evaluations and C P U time. Assume that we are interested in using C P U time as a performance measure, we define tp.^ as the C P U time required to solve problem p by solver s. Let ibest be the min imum C P U time used by a l l solvers for problem p. The performance ratio is given by r - Is^ tbest T h e ideas are the same when comparisons are based on the number of function evaluations. The performance profile for solver s is the percentage of problems that a performance ratio rp^g is w i th in a factor T \u20ac R of the best possible ratio. A plot of the performance profile reveals a l l of the major performance characteristics. In particular, if the set of problems is suitably large, then solvers wi th large probabil ity psir) are to be preferred. One important property of performance profiles is that they are insensitive to the results on a small number of problems. A lso , they are largely unaffected by small changes in results over many problems [12]. In this chapter, al l the performance profile figures are plotted in the log j - scale. The M a t l a b performance profile script perf.m was downloaded from http : \/ \/www-unix .mcs .anl .gov \/~more \/cops [12]. If a solver s fails on a given problem p, its corresponding measures, such as the number of function evalu- ations or C P U time, are set to N a N . In this way, the performance profile not only captures the performance characteristics, but also it shows the percentage of solved problems. 5.2 Termination criteria In our numerical experiments, each algorithm is started at the same in i t ia l point. We use the following criteria to terminate the algorithms: < emach-10^ < e < e (*) < < 1000, where emach is the machine epsUon, e is the opt imal i ty tolerance, gk is the projected gradient (2.12) of the objective function, n \/ is the number of function evaluations. Note that the termination criteria are the same as in the A S T R A L and L - B F G S - B solvers, except that (*) is partictilar to the B C N L S solver. If an algorithm terminates but the optimality condition < e is not satisfied, we consider the algorithm to have failed on the given problem. However, since the objective f imction is in the form of least-squares, i f an algorithm terminates w i t h \/fc < e and Xk is feasible, we consider the algorithm succeeded on the given problem, regardless of the value of the projected gradient. 5.3 Benchmarlc tests with Algorithm 566 T h e test set of benchmark least-squares problems from Nethb A lgor i thm 566 are unconstrained least-squares problems. We do not include Problems 1-3 from the original Nethb collection, as these are hnear least-squares problems. The remaining fifteen nonlinear least-squares problems are used as the benchmark to test the three boimd-constrained algorithms. We arbitrari ly choose the bounds on the variables to be 0 < i < oo. The original Net l ib problems were implemented in Fortran subroutines. We recoded the problems in M a t lab. Furthermore, we use the standard in i t ia l points as defined i n A l g o r i t h m 566 for a l l the test problems. The results w i th fifteen test problems are given in Table 5.1. To summarize and compare the overall performances of the three solvers based on the results in Table 5.1, we use the following criteria to judge the outcomes (success or failure) of the solvers on any problem. If both \/ > 10~^ and > 10\"\"*, then we consider the solver has failed on the problem, otherwise it is a success. Based on the criteria, we show the overall performance of the three algorithms in Table 5.2. \\\\fk-fk-i\\\\ max(l,||Mi,||A_i||) Il5.ll fk m rif Table 5.1: Results of fifteen benchmark tests from Algor i thm 566 Problem Algor i thm 9\\\\ rif time(Sec) B C N L S L B F G S B A S T R A L 2.47e - 15 5.08e - 06 7.42e - 06 1.30e - 07 3 . 5 1 e - 0 1 5.36e - 02 5 3.00e - 02 22 1.12e - 02 36 5.20e - 01 B C N L S L B F G S B A S T R A L 3.62e + 02 3.62e + 02 5.31e + 01 2.50e + 02 2.50e + 02 1.29e - 03 1 9.70e - 01 22 1.43e - 02 22 3.59e - 01 B C N L S L B F G S B A S T R A L 9.98e - 30 6.82e - 06 9.83e - 06 9.99e - 15 1.05e - 02 1.27e - 03 7 4.00e - 02 21 1.51e - 02 27 5.79e - 01 B C N L S L B F G S B A S T R A L 6.40e + 01 6.40e + 01 6.40e + 01 4.62e - 07 2 . 1 3 e - 1 4 1.74e - 06 4 2.00e - 02 5 7.95e - 03 3 5.53e - 02 B C N L S L B F G S B A S T R A L 4 . 1 1 e - 0 3 4 . 1 1 e - 0 3 4.11e - 03 1.28e - 05 3.46e - 06 3.52e - 06 6 4.00e 37 2.07e 86 1.16e - 0 2 - 0 2 + 00 B C N L S 4 L B F G S B A S T R A L 1 . 5 4 e - 0 4 1.54e - 04 1.54e - 04 3.29e - 06 1.08e - 06 6.90e - 06 9 7.00e 34 2.18e 72 6.33e - 0 2 - 0 2 - 0 1 10 B C N L S L B F G S B A S T R A L 1.21e + 04 7.30e + 08 5.03e + 06 2.69e + 05 4.06e + 10 2.86e + 07 21 1.40e 4 8.36e 31 5.55e - 0 1 - 0 3 - 0 1 11 B C N L S L B F G S B A S T R A L 6.88e - 03 6.88e - 03 6.88e - 03 1.53e - 06 6.61e - 06 2.72e - 05 6 7.00e 33 2.95e 45 6.36e - 02 - 0 2 - 0 1 12 B C N L S L B F G S B A S T R A L 5.65e - 08 5 . 1 7 e - 0 6 9.30e - 06 2.66e - 04 8.43e - 03 3.08e - 03 5 4.00e 21 1.48e 32 5.54e - 0 2 - 0 2 - 0 1 13 B C N L S L B F G S B A S T R A L 6.22e + 01 6.22e + 01 6.22e + 01 3.52e - 03 1.60e - 05 1.14e - 02 18 l.OOe 21 1.31e 37 5.21e - 0 1 - 0 2 - 0 1 14 B C N L S L B F G S B A S T R A L 1.06e 1.06e 1.06e \u202205 -05 \u202205 1.14e + 00 1.57e - 03 1.66e - 03 15 9.00e 30 2.10e 17 3.61e - 0 2 - 0 2 - 0 1 15 150 B C N L S L B F G S B A S T R A L 7.39e - 06 4.81e - 02 4.40e - 02 2.33e - 04 1.87e + 00 2.29e + 00 86 l .O le 145 2.29e 201 5.42e + 02 + 00 + 00 B C N L S 1 .22e - 14 2.50e - 07 3 5.31e + 00 16 2000 L B F G S B l.OOe + 09 2.00e + 06 4 4.83e + 02 A S T R A L l , 4 3 e - 22 6.90e - 1 0 14 1.64e + 01 Continued on next page Table 5.1 - continued from previous page Problem n A lgor i thm \/ \\\\9 I time(Sec) B C N L S 2.54e - 02 2.57e - 0 3 11 l.OOe - 01 17 5 L B F G S B 2.54e - 02 2.18e - 0 6 40 3.07e - 01 A S T R A L 2.54e - 02 2.52e - 0 2 76 1.37e + 00 B C N L S 2 . 0 1 e - 02 8.45e - 0 6 15 2.30e - 01 18 11 L B F G S B 2.01e - 02 4.90e - 0 6 73 3.36e - 01 A S T R A L 2.01e - 02 5.35e - 0 5 357 5.65e + 00 Solver Success Failure Success Rate(%) B C N L S 10 5 67 L B F G S B 10 5 67 A S T R A L 9 6 60 Table 5.2: Summary of the benchmark tests from Algor i thm 566 Overal l , the three algorithms have similar success rates on this set of test problems. For Problem 15 {Chebyquad function), both L - B F G S - B and A S - T R A L seem to have difficulties in finding the solution, but B C N L S has per- formed reasonably well on the problem, terminated wi th \/ = 7.39 x 10\"^ and (l\u0302 ll = 2.33 X 10\" ' ' . A lso , a l l three solvers failed on Problem ^{Helical Valley Function), Prob lem 10 {the Meyer function) and Problem 14 (the Brown and Dennis function). For Problem 16 [the Brown almost-linear function), both B C N L S and A S T R A L performed well, but I ^ B F G S - B d id not solve i t . O n the other hand, for Prob lem 17 {the Osborne 1 function), L - B F G S - B performed bet- ter than A S T R A L and B C N L S . For details on these function definitions, refer to [1, 39, 40]. Because both B C N L S and A S T R A L were written in M a t l a b , whereas L - B F G S - B was written in Fortran, we compare the number of function eval- uations among the three algorithms. T h e computing time is compared only between B C N L S and A S T R A L . Figure 5.1 and Figure 5.2 illustrate the perfor- mance profiles for B C N L S , L - B F G S - B , and A S T R A L on the fifteen benchmark nonhnear least-squares problems. Figure 5.1 compares the number of function evaluations among the three solvers. We observe that the performance profile for B C N L S hes above both L - B F G S - B and A S T R A L for T < 2.6. T h a t is, for approximately 55% of the problems, the performance ratio of the function evaluation for the B C N L S algorithm is wi th in a factor 2.6 of the best possible ratio. Therefore, the performance profile of the number of function evaluations for B C N L S dominates that of L - B F G S - B and A S T R A L . We conclude that B C - N L S is more efficient in terms of function evaluation. Figure 5.2 compares C P U time between B C N L S and A S T R A L . Clearly, the performance profile of C P U time for B C N L S dominates A S T R A L , thus B C N L S uses less C P U time than A S T R A L . Performance profiles, number of function evaluations 0 .4 ; 0.\u00c0 -A \u2014 \u2022 \u2014 B C N L S - \u2022 - L B F G S B \u2022 - D - A S T R A L 9 ^ <>\u2022\u2022 I \u00ab& t 9\" - 6 o-\u00e8 - 6 \u2022\u2014-a 1 ,H - i 9 2 . 5 3 X Figure 5.1: Performance profiles on benchmark problems from A l g o r i t h m 566: number of function evaluations. 5.4 C U T E r feasibility problems T h e general constrained C U T E r test problems have the form minimize f(x) , (5.1) subject to 4 < c(x) \u2022 R\u2122, 4 and Uc are lower and upper bounds on c{x), and ix and Ux are lower and upper bounds on x. We use problems from the C U T E r set to generate a set of nonhnear least- squares test problems. These problems are cast as that of finding a feasible point to (5.1). Such problems can arise as subproblems for more general opt imizat ion packages, where it is important to have a feasible starting point. We can rewrite the constrained problem (5.1) as minimize fix) (5.2) subject to c{x) \u2014 s = 0, (\u2022X ^ X < Ux- Because we are only interested in getting a feasible point for the C U T E r prob- Performance profiles, CPU time 0) o.e( \u2022 Oi SS S 0 . 9 \u2022 0 .3 0 .2 0.1' 0 \u2022 -6 \u2022 -** 20 and depth > 1mm. Table 6.4 compares the depth estimation results from B C N L S and A R T _ L M for the latter case. Notice that a l l error margins and standard deviations obtained w i th B C N L S are very close to those obtained wi th the A R T X M algorithm. Figure 6.7 illustrates the estimated depth compared w i th true depth. F i g - ure 6.8 shows the relationship between the error of the estimated depth and the signal to noise ratio S N R . In general, signal w i th higher signal to noise ratio results i n more accurate estimated depth. These results by B C N L S also match w i t h the results from A R T _ L M very closely. Parameter Comparison B C N L S A R T X M errTr 0.07 0.07 . n errMax 0.17 0.19 std 0.09 0.11 errTr 0.13 0.19 T2 errMax 0.35 0.52 std 0.17 0.23 errTr 0.11 0.11 Oil errMax 0.29 0.28 std 0.14 0.15 errTr 1.53 2.92 DC e r r M a x 2.59 7.71 std 0.83 2.96 C P U time (seconds) 43.00 36.00 Table 6.2: Comparison of lifetime fitting using B C N L S and A R T X M D a t a points A l l (A) d > 1mm (B) S N R > 20 (C) (B) & (C) d con a con d con d con m n E r r M g n 0.28 64.56 0.21 63.04 0.17 34.75 0.11 34.91 r M n M g n 25.41 24.47 3.84 23.88 19.75 13.15 1.96 13.24 r M a x M g n 259.45 216.93 30.89 90.73 178.82 44.91 8.63 44.91 meanStd 0.16 21.54 0.12 21.59 0.11 13.48 0.06 13.71 r M e a n S t d 15.61 8.33 2.22 8.30 12.56 5.12 1.17 5.20 r M a x S t d 139.94 92.98 15.23 49.86 93.62 15.19 5.29 15.19 Table 6.3: Comparison of depth fitting results for (A) a l l data points, (B) data points w i th depth > 1mm, (C) data points with S N R > 20, (D) data points w i t h S N R > 20 and depth > 1mm 6.6 Optical properties estimation T h e effective optical properties, absorption coefficient \/Xa and reduced scattering coefficient \/ i ^ , of biological tissues play a crit ical role in most algorithms for processing optical fluorescence data. For small animals such as mice, there can be large variations of the optical properties wi th in a given region of interest. In estimation of optical properties, the curve f itt ing problem has the same general form as the curve fitting problem in Section 6.3. The unknown param- eters are the optical properties \/i\u00bb and \/x^, and the amplitude A. The lower and upper bounds of the optical properties are based on a priori knowledge of the medium optical properties values. In our experiments, these bounds are set as 0.0001 ** 20 and depth > 1mm 11r - 10 r E r r C e n = 0 . 5 % , rE r rMax=2 .0%, rS td=1 .2% ^ \u2022 0 e r r o r M a x b a r i X S tdBar true 2 5 30 3 5 4 0 4 5 5 0 b e s t S N R Figure 6.8: Relative accuracy of calculated depth versus signal S N R where the units of M o and p'^ are mm~^. 6.6.1 Simulation for estimating optical properties A n I R F curve w i th a peak count of 1000 and a ful l -width-at-half -max ( F W H M , or the time between 50% of the m a x i m u m count level before and after the peak) of 400 picoseconds was numerically generated. The peak count of the T P S F s was fixed to be 1500. Different T P S F curves were calculated using each pair of {Ha, p's) combination from the pre-selected sets of values i n Table 6.5. For comparisons, one set of tests was carried out without noise. For each pair of (\/Xa, Ms), multiple trials were performed by adding Poisson noise (shot noise) to the T P S F and I R F curves. The mean values of the fitted optical properties and standard deviation for a l l the trials were then computed. The relative errors are the differences between the fitted values and their corresponding true values are shown in Table 6.5. Parameter Values (mm )\u0302 Ma 0.001, 0.005, 0.01, 0.03, 0.05, 0.1 0.2, 0.5, 1, 2, 3 Table 6.5: fia and p'^ values used in simulation without noise w i th noise Comparison Statistic A R T _ L M B C N L S A R T _ L M B C N L S Residual M e a n S T D 0.00 0.00 0.00 0.00 65.60 1.95 65.71 1.96 T i m e (s) M e a n 0.15 0.17 0.13 0.16 S T D 0.03 0.08 0.13 0.03 R e l Pa (%) M e a n -0.00 0.00 -1.48 -0.22 S T D 0.00 0.00 4.56 4.10 Re l (%) M e a n S T D 0.00 0.00 0.00 0.00 -0.35 0.79 0.00 0.75 Table 6.6: Comparison of results of optical properties wi th A R T i M and B C - N L S 6.6.2 Results for optical properties We have compared the performance of the solver B C N L S wi th A R T X M , note that no parameter transform was used wi th A R T _ L M in this case. The compar- ison criteria for different algorithms include the relative error, residual function value at the solution, and computation time. Table 6.6 shows the results of optical properties for the simulated data. For the data without Poisson noise, B C N L S performs as well as A R T X M method. However, for the simulations w i th Poisson noise, B C N L S outperforms the A R T X M method. 6.7 Discussion T h e numerical experiments on estimation of fluorescence lifetime, depth and optical properties at A R T illustrate that B C N L S can be applied to solve the curve fitting problems w i th bounds on parameters in practice. The challenges i n fluorescence lifetime fitting lie i n multiple lifetime fitting. A l though the ex- perimental results presented in this chapter are l imited to fluorescent signals of two fluorophore components, B C N L S can be easily adapted to the cases for multiple fluorescence lifetime curve fitting. Moreover, curve fitting is a process of finding an optimized combination of unknown parameters by some optimization method. In theory, there are some differences between the solutions of curve fitting opt imizat ion and the recon- struction of the known values i n the simulation. G iven the mathematical model for fluorescence signal in Section 6.2, our experiments reveal that a good curve fitting optimization solution may not always result in satisfactory reconstruction of the unknown. Nevertheless, the experiments show that the B C N L S solver is able to handle the simple bounds on variables in this curve fitting application. Chapter 7 Conclusions and Future Work In this thesis, we have developed the B C N L S algorithm for bound-constrained nonhnear least squares that solves a sequence of bound-constrained linear least squares subproblems, using techniques motivated by the Levenberg-Marquardt method and ^2-norm regularization. The subproblems are solved by B C L S , an existing solver for bound-constrained hnear least squares. Our approach differs substantially from previous general bound-constrained optimization methods. Our approach takes advantage of the special structure of the objective and uses the two-norm regularization to formulate the subproblems. B C N L S efficiently uses of function evaluations at the expense of potentially solving more linear linear-squares subproblems. The convergence properties of the new method are generally as good as, or better than, quasi-Newton methods such as L - B F G S - B , or the trust-region approach such as A S T R A L . Prehminary results are promising. The industr ial apphcation of curve f itting in fluorescence optical imaging fleld suggests that the new algorithm may prove to be useful i n practice. There is much scope for further development of the algorithm described i n this thesis. One promising direction would be to allow the user to choose preconditioners to speed up the process of solving the subproblems for large-scale systems. It is well-known that conjugate-gradient methods can be accelerated if a nonsingular preconditioner matr ix M is available. Because the subproblem solver B C L S uses the conjugate gradient L S Q R algorithm, we beheve that preconditioning would be useful in our proposed algorithm. However, good preconditioners are hard to define, especially for large and sparse systems, although B C L S is capable to solve the subproblem wi th user-supplied preconditioning routines. Final ly , the damping parameter 5 plays a crit ical role in the proposed a l - gor i thm B C N L S . The performance of the updating strategy for 5 depends on problems. It is a longstanding challenge to find a perfect updating strategy for different optimization problems. The self-adaptive Levenberg-Marquardt strat- egy works well in general, but further investigation may lead to better updating strategies. Bibliography [1] V . Z. Averbukh , S. Figueroa, and T . Schlick. Remark on Algorithm-566. Transactions on Mathematical Software, 20(3):282-285, 1994. [2] D . P. Bertsekas. Constrained Optimization and Lagrange Multiplier Meth- ods. Academic Press, 1982. [3] \u00c2. Bjorck. Numerical Methods for Least Squares Problems. S I A M , Ph i lade l - phia, 1996. [4] J . Burke , J . J . More , and G . Toraldo. Convergence properties of trust region methods for linear and convex constraints. Math. Program., 47(3):305-336, 1990. [5] R . H . B y r d , P. L u , J . Nocedal, and C . Zhu. A l imited memory algorithm for bound constrained optimization. SIAM Journal on Scientific and Statistical Computing, 16(5):1190-1208, 1995. [6] A . R . Conn , N . L M . Gould , and P. L . Toint. G loba l convergence of a class of trust region algorithms for optimization w i th simple bounds. SIAM J. Numer. Anal, 25:433-460, 1988. [7] A . R . Conn , N . I. M . Gou ld , and P. L . Toint. Testing a class of meth- ods for solving minimizat ion problems wi th simple bounds on variables. Mathematics of Computation, 50(182):399-430, 1988. [8] A . R . Conn , N . L M . Gould , and P. L . Toint. LANCELOT: A Fortran pack- age for large-scale nonlinear optimization (Release A). Springer-Verlag, 1992. [9] A . R . Conn , N . I. M . Gou ld , and P. L . Toint. Trust-Region Methods. M P S - S I A M Series on Opt imizat ion . S I A M , 2000. [10] H . D a n , N . Yamashita , and M . Fukushima. Convergence properties of the inexact Levenberg-Marquardt method under local error bound. Optimiza- tion methods and software, 17:605-626, 2002. [11] R. S. Dembo, S. C. Eisenstat, and T . Seihaug. Inexact Newton methods. SIAM J. Numer. Anal, 19:400-408, 1982. [12] E . D . Do lan and J . J . More. Benchmarking optimization software w i th per- formance profiles. Mathematical Programming, pages 201-213, 2002. [13] G . M a et al. Fluorescence lifetime estimation of multiple near-infrared dyes i n mice. In F . S. A z a r and X . Intes, editors, Multimodal Biomedical Imag- ing III, volume 6850. Proceedings of the International Society of O p t i m a l Imaging, February 2008. [14] J . Fan and J . P a n . Convergence properties of a self-adaptive Levenberg- Marquardt algorithm under local error bound condition. Computational Optimization and Applications, 34:47-62, 2006. [15] J . Y . Fan . A modified Levenberg-Marquardt algorithm for singular system of nonlinear equations. Journal of Computational Mathematics, 21(5):625- 636, 2003. [16] R . Felkel. Selection of nonlinear programming problems using the C U T E classification scheme, J u l y 1999. [17] C. Fraley. Solution of Nonlinear Least-Squares Problems. P h D thesis, Stan- ford University, 1987. [18] M . P. Friedlander. B C L S : B o u n d constrained least-squares, M a r c h 2007. h t tp : \/ \/www.cs .ubc . ca \/~mpf \/b c l s \/ . [19] M . P. Friedlander and K . Hatz . Comput ing nonnegative tensor factor- izations. Tech. Rep. TR-2006-21, Dept. Computer Science, University of B r i t i s h Co lumbia , Vancouver, December 2007. To appear in Optimization Methods and Software. [20] P. E . G i l l , W . Murray , and M . H . Wright . Practical Optimization. Academic Press, 1981. [21] G . H . Go lub and C. L . V a n Loan . Matrix Computations. Johns-Hopkins, 1983. [22] N . I . M . Gou ld , D . Orban, and P. L . Toint. C U T E r and SifDec: A con- strained and unconstrained testing environment, revisited. ACM Transac- tion on Mathematical Software, 29(4):373-394, 2003. [23] W . Hager and H . Zhang. A new active set algorithm for box constrained optimization. S I AM Journal on Optimization, 17(2):525-557, 2006. [24] W . Hager and H . Zhang. Recent advances in bound constrained opt imiza- tion. In F . CeragioU, A . Dontchev, H . Furuta , K . M a r t i , and L . Pandolf i , editors, System Modeling and Optimization, volume 199, pages 67-82. I F I P International Federation for Information Processing, Springer, 2006. [25] D . H a l l , G . M a , F . Lesage, and Y . Wang. Simple t ime-domain optical method for estimating the depth and concentration of a fluorescent inc lu - sion in a turb id medium. Optics Letter, 29(19) :2258-2260, 2004. [26] L . Hei . A self-adaptive trust region method. Technical report, Chinese Academy of Sciences, 2000. Report , A M S S . [27] J . E . Dennis J r . , D . M . Gay, and R . E . Welsch. A l g o r i t h m 573: N L 2 S 0 L - a n adaptive nonlinear least-squares algorithm. ACM Transactions on Mathe- matical Software, 7(3):369-383, Sept. 1981. [28] J . Lakowicz . Principles of fluorescence spectroscopy. Springer, 2006. [29] C. L . Lawson and R . J . Hanson. Solving Least Squares Problems. N u m - ber 15 in Classics in App l i ed Mathematics . Society of Industrial and A p - plied Mathematics , 1995. Orig inal ly published: Prent ice-Hal l , Englewood Cliffs, N J , 1974. [30] C . L . Lawson and R . J . Hanson. Solving Least Squares Problems. S I A M , 1995. [31] F . Leblond, S. Fortier, and M . P. Friedlander. Diffuse optical fluorescence tomography using time-resolved data acquired i n transmission. In Fred S. A z a r , editor. Multimodal Biomedical Imaging II, volume 6431. Proceedings of the International Society of O p t i m a l Imaging, February 2007. [32] K . Levenberg. A n method for the solution of certain nonlinear problems i n least squares. Quarterly of Applied Mathematics, 2:164-168, 1944. [33] C. L i n and J . J . More. Newton's method for large bound-constrained opt i - mizat ion problems. SIAM Journal on Optimization, 9(4):1100-1127, 1999. [34] D . C . L i u and J . Nocedal. O n the l imited memory method for large scale optimization. Mathematical Programming B, 45(3):503-528, 1989. [35] G . M a , J . Delorme, P. Gal lant , and D . Boas. Comparison of simplified Monte Car lo simulation and diffusion approximation for the fluorescence signal from phantoms wi th typical mouse tissue optical properties. Applied Optics, 46(10):1686-1692, 2007. [36] K . Madsen, H . B . Nielsen, and O. Tinglef\u00ef. Methods for nonlinear least squares problems. Technical report, Technical University of Denmark, A p r i l 2004. [37] D . W . Marquardt . A n algorithm for least squares estimation of nonlinear parameters. Journal of the Institute of Mathematics and its Applications, 11(2):431-441, June 1963. [38] J . J . More, B . S. Garbow, and K . E . Hi l l s trom. User guide for M I N P A C K - 1 . A N L - 8 0 - 7 4 , Argonne Nat ional Laboratory, Argonne, 1980. [39] J . J . More , B . S. Garbow, and K . E . Hi l l s trom. A lgor i thm 566: F O R - T R A N subroutines for testing unconstrained optimization software. ACM Transactions on Mathematical Software, 7(1):136-140, 1981. [40] J . J . More, B . S. Garbow, and K . E . Hi l l s trom. Testing unconstrained opt i - mization software. ACM Transactions on Mathematical Software, 7(1):17- 41, 1981. [41] J . J . More , D . C . Sorensen, K . E . Hi l l s t rom, and B . S. Garbow. The M I N P A C K project. In W . J . Cowell , editor, Sources and Development of Mathematical Software, pages 88-111. Prent ice-Hal l , 1984. [42] J . J . More. The Levenberg-Marquardt algorithm: implementation and the- ory. In G . A . Watson, editor, Lecture Notes in Mathematics 630: Numerical Analysis, pages 105-116. Springer-Verlag, 1978. [43] J . J . More. Recent developments in algorithms and software for trust region methods. In A . Bachem, M . Grotschel, and B . Kor te , editors. Mathematical Programming: The State of Art, pages 258-287. Springer-Verlag, 1983. [44] J . J . More and S. J . Wright . Optimization software guide. Society of Indus- t r ia l and App l i ed Mathematics , 1993. [45] D . D . Morr ison. Methods for nonhnear least squares problems and conver- gence proofs, tracking programs and orbital determinations. In Proceedings of the Jet Propulsion Laboratory Seminar, pages 1-9, 1960. [46] H . B . Nielsen. Damping parameter i n Marquardt ' s method. Technical report, Informatics and Mathematica l Model l ing , Technical University of Denmark, D T U , R i chard Petersens Plads , Bu i ld ing 321, DK-2800 Kgs . Lyngby, A p r i l 1999. [47] J . Nocedal and S. J . Wright . Numerical Optimization. Springer, second edition, 2006. [48] C . C. Paige and M . A . Saunders. L S Q R : A n algorithm for sparse linear equations and sparse least squares. ACM Transactions on Mathematical Software, 8(1):43-71, 1982. [49] M . J . D . Powell. Convergence properties of a class of minimizat ion algo- r ithms. In R . R . Meyer and S . M . Robinson, editors. Nonlinear Programming 2, pages 1-27. Academic Press, 1975. [50] K . Schittkowski. D F N L P : A F o r t r a n implementation of an S Q P - G a u s s - Newton algorithm. Report , Department of Computer Science, University of Bayreuth , 2005. [51] A . N . Tikhonov and V . Y . Arsenin. Solutions of ill-posed problems. W i n s t o n and Sons, 1977. [52] L . N . Trefethen and D . B a u III. Numerical Linear Algebra. S I A M , 1997. [53] L . X u and J . Burke . A S T R A L : A n active set \/oo-trust-region algorithm for box constrained optimization. Optimization online, J u l y 2007. [54] N . Yamashi ta and M . Fukushima. O n the rate of convergence of the Levenberg-Marquardt method. Computing (Suppl), 15:239-249, 2001. [55] Y . X . Y u a n . Trust region algorithms for nonlinear equations. Information, 1:7-20, 1998. [56] Y . X . Y u a n . A review of trust region algorithms for optimization. In J . M . B a l l and J . C . R . Hunt , editors, ICM99: Proceedings of the fourth inter- national congress on Industrial and Applied Mathematics, pages 271-282. Oxford University Press, 2000. [57] C . Zhu, R . B y r d , P . L u , and J . Nocedal. A l g o r i t h m 778: L - B F G S - B , fortran subroutines for large-scale bound-constrained optimization. ACM Transactions on Mathematical Software, 23(4):550-560, 1997.","attrs":{"lang":"en","ns":"http:\/\/www.w3.org\/2009\/08\/skos-reference\/skos.html#note","classmap":"oc:AnnotationContainer"},"iri":"http:\/\/www.w3.org\/2009\/08\/skos-reference\/skos.html#note","explain":"Simple Knowledge Organisation System; Notes are used to provide information relating to SKOS concepts. There is no restriction on the nature of this information, e.g., it could be plain text, hypertext, or an image; it could be a definition, information about the scope of a concept, editorial information, or any other type of information."}],"Genre":[{"label":"Genre","value":"Thesis\/Dissertation","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/hasType","classmap":"dpla:SourceResource","property":"edm:hasType"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/hasType","explain":"A Europeana Data Model Property; This property relates a resource with the concepts it belongs to in a suitable type system such as MIME or any thesaurus that captures categories of objects in a given field. It does NOT capture aboutness"}],"GraduationDate":[{"label":"GraduationDate","value":"2008-11","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#dateIssued","classmap":"vivo:DateTimeValue","property":"vivo:dateIssued"},"iri":"http:\/\/vivoweb.org\/ontology\/core#dateIssued","explain":"VIVO-ISF Ontology V1.6 Property; Date Optional Time Value, DateTime+Timezone Preferred "}],"IsShownAt":[{"label":"IsShownAt","value":"10.14288\/1.0051461","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/isShownAt","classmap":"edm:WebResource","property":"edm:isShownAt"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/isShownAt","explain":"A Europeana Data Model Property; An unambiguous URL reference to the digital object on the provider\u2019s website in its full information context."}],"Language":[{"label":"Language","value":"eng","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/language","classmap":"dpla:SourceResource","property":"dcterms:language"},"iri":"http:\/\/purl.org\/dc\/terms\/language","explain":"A Dublin Core Terms Property; A language of the resource.; Recommended best practice is to use a controlled vocabulary such as RFC 4646 [RFC4646]."}],"Program":[{"label":"Program","value":"Computer Science","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeDiscipline","classmap":"oc:ThesisDescription","property":"oc:degreeDiscipline"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeDiscipline","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the program for which the degree was granted."}],"Provider":[{"label":"Provider","value":"Vancouver : University of British Columbia Library","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/provider","classmap":"ore:Aggregation","property":"edm:provider"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/provider","explain":"A Europeana Data Model Property; The name or identifier of the organization who delivers data directly to an aggregation service (e.g. Europeana)"}],"Publisher":[{"label":"Publisher","value":"University of British Columbia","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/publisher","classmap":"dpla:SourceResource","property":"dcterms:publisher"},"iri":"http:\/\/purl.org\/dc\/terms\/publisher","explain":"A Dublin Core Terms Property; An entity responsible for making the resource available.; Examples of a Publisher include a person, an organization, or a service."}],"Rights":[{"label":"Rights","value":"Attribution-NonCommercial-NoDerivatives 4.0 International","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/rights","classmap":"edm:WebResource","property":"dcterms:rights"},"iri":"http:\/\/purl.org\/dc\/terms\/rights","explain":"A Dublin Core Terms Property; Information about rights held in and over the resource.; Typically, rights information includes a statement about various property rights associated with the resource, including intellectual property rights."}],"RightsURI":[{"label":"RightsURI","value":"http:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#rightsURI","classmap":"oc:PublicationDescription","property":"oc:rightsURI"},"iri":"https:\/\/open.library.ubc.ca\/terms#rightsURI","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the Creative Commons license url."}],"ScholarlyLevel":[{"label":"ScholarlyLevel","value":"Graduate","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#scholarLevel","classmap":"oc:PublicationDescription","property":"oc:scholarLevel"},"iri":"https:\/\/open.library.ubc.ca\/terms#scholarLevel","explain":"UBC Open Collections Metadata Components; Local Field; Identifies the scholarly level of the author(s)\/creator(s)."}],"Title":[{"label":"Title","value":"A Levenberg-Marquardt method for large-scale bound-constrained nonlinear least-squares","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/title","classmap":"dpla:SourceResource","property":"dcterms:title"},"iri":"http:\/\/purl.org\/dc\/terms\/title","explain":"A Dublin Core Terms Property; The name given to the resource."}],"Type":[{"label":"Type","value":"Text","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/type","classmap":"dpla:SourceResource","property":"dcterms:type"},"iri":"http:\/\/purl.org\/dc\/terms\/type","explain":"A Dublin Core Terms Property; The nature or genre of the resource.; Recommended best practice is to use a controlled vocabulary such as the DCMI Type Vocabulary [DCMITYPE]. To describe the file format, physical medium, or dimensions of the resource, use the Format element."}],"URI":[{"label":"URI","value":"http:\/\/hdl.handle.net\/2429\/5648","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#identifierURI","classmap":"oc:PublicationDescription","property":"oc:identifierURI"},"iri":"https:\/\/open.library.ubc.ca\/terms#identifierURI","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the handle for item record."}],"SortDate":[{"label":"Sort Date","value":"2008-12-31 AD","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/date","classmap":"oc:InternalResource","property":"dcterms:date"},"iri":"http:\/\/purl.org\/dc\/terms\/date","explain":"A Dublin Core Elements Property; A point or period of time associated with an event in the lifecycle of the resource.; Date may be used to express temporal information at any level of granularity. Recommended best practice is to use an encoding scheme, such as the W3CDTF profile of ISO 8601 [W3CDTF].; A point or period of time associated with an event in the lifecycle of the resource.; Date may be used to express temporal information at any level of granularity. Recommended best practice is to use an encoding scheme, such as the W3CDTF profile of ISO 8601 [W3CDTF]."}]}