{"Affiliation":[{"label":"Affiliation","value":"Arts, Faculty of","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","classmap":"vivo:EducationalProcess","property":"vivo:departmentOrSchool"},"iri":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","explain":"VIVO-ISF Ontology V1.6 Property; The department or school name within institution; Not intended to be an institution name."},{"label":"Affiliation","value":"Psychology, Department of","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","classmap":"vivo:EducationalProcess","property":"vivo:departmentOrSchool"},"iri":"http:\/\/vivoweb.org\/ontology\/core#departmentOrSchool","explain":"VIVO-ISF Ontology V1.6 Property; The department or school name within institution; Not intended to be an institution name."}],"AggregatedSourceRepository":[{"label":"Aggregated Source Repository","value":"DSpace","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/dataProvider","classmap":"ore:Aggregation","property":"edm:dataProvider"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/dataProvider","explain":"A Europeana Data Model Property; The name or identifier of the organization who contributes data indirectly to an aggregation service (e.g. Europeana)"}],"Campus":[{"label":"Campus","value":"UBCV","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeCampus","classmap":"oc:ThesisDescription","property":"oc:degreeCampus"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeCampus","explain":"UBC Open Collections Metadata Components; Local Field; Identifies the name of the campus from which the graduate completed their degree."}],"Creator":[{"label":"Creator","value":"Chuang, Jenny","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/creator","classmap":"dpla:SourceResource","property":"dcterms:creator"},"iri":"http:\/\/purl.org\/dc\/terms\/creator","explain":"A Dublin Core Terms Property; An entity primarily responsible for making the resource.; Examples of a Contributor include a person, an organization, or a service."}],"DateAvailable":[{"label":"Date Available","value":"2013-08-20T21:47:15Z","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/issued","classmap":"edm:WebResource","property":"dcterms:issued"},"iri":"http:\/\/purl.org\/dc\/terms\/issued","explain":"A Dublin Core Terms Property; Date of formal issuance (e.g., publication) of the resource."}],"DateIssued":[{"label":"Date Issued","value":"2013","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/issued","classmap":"oc:SourceResource","property":"dcterms:issued"},"iri":"http:\/\/purl.org\/dc\/terms\/issued","explain":"A Dublin Core Terms Property; Date of formal issuance (e.g., publication) of the resource."}],"Degree":[{"label":"Degree (Theses)","value":"Master of Arts - MA","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#relatedDegree","classmap":"vivo:ThesisDegree","property":"vivo:relatedDegree"},"iri":"http:\/\/vivoweb.org\/ontology\/core#relatedDegree","explain":"VIVO-ISF Ontology V1.6 Property; The thesis degree; Extended Property specified by UBC, as per https:\/\/wiki.duraspace.org\/display\/VIVO\/Ontology+Editor%27s+Guide"}],"DegreeGrantor":[{"label":"Degree Grantor","value":"University of British Columbia","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeGrantor","classmap":"oc:ThesisDescription","property":"oc:degreeGrantor"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeGrantor","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the institution where thesis was granted."}],"Description":[{"label":"Description","value":"A Monte Carlo simulation was conducted to investigate the Type I error rates of several versions of chi-square difference tests for nonnormal data in confirmatory factor analysis (CFA) models. The studied statistics include: 1) the original uncorrected difference test, D, obtained by taking the difference of the ML chi-squares for the respective models; 2) the original robust difference test, DR\u2081, due to Satorra and Bentler (2001); 3) the recent modification to this test, DR\u2082, which ensures that the statistic remains positive (Satorra & Bentler, 2010); and 4) a hybrid statistic, DH, proposed by Asparouhov and Muth\u00e9n (2010), which is equal to DR\u2081 when DR\u2081 > 0, and otherwise is equal to DR\u2081. Types of constraints studied included constraining factor correlations to 0, constraining factor correlations to 1, and constraining factor loadings to equal each other within or across factors. An interesting finding was that the uncorrected test  appeared to be robust to nonnormality when the constraint was setting factor correlations to zero. The robust tests performed well and similarly to each other in many conditions. The new strictly positive test, DR\u2082 exhibited slightly inflated rejection rates in conditions that involved constraining factor loadings, while DR\u2081 and DH exhibited rejection rates slightly below nominal in conditions that involved constraining factor correlations or factor loadings. While more research is needed on the new strictly positive test, the original robust difference test or the hybrid procedure are tentatively recommended.","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/description","classmap":"dpla:SourceResource","property":"dcterms:description"},"iri":"http:\/\/purl.org\/dc\/terms\/description","explain":"A Dublin Core Terms Property; An account of the resource.; Description may include but is not limited to: an abstract, a table of contents, a graphical representation, or a free-text account of the resource."}],"DigitalResourceOriginalRecord":[{"label":"Digital Resource Original Record","value":"https:\/\/circle.library.ubc.ca\/rest\/handle\/2429\/44856?expand=metadata","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/aggregatedCHO","classmap":"ore:Aggregation","property":"edm:aggregatedCHO"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/aggregatedCHO","explain":"A Europeana Data Model Property; The identifier of the source object, e.g. the Mona Lisa itself. This could be a full linked open date URI or an internal identifier"}],"FullText":[{"label":"Full Text","value":"INVESTIGATION OF TYPE I ERROR RATES OF THREE VERSIONS OF ROBUST CHI-SQUARE DIFFERENCE TESTS IN STRUCTURAL EQUATION MODELING  by Jenny Chuang  B.A., The University of British Columbia, 2013  A THESIS SUBMITTED IN PARTIAL FULFILLMENT OF THE REQUIREMENTS FOR THE DEGREE OF  MASTER OF ARTS in THE FACULTY OF GRADUATE AND POSTDOCTORAL STUDIES (Psychology)  THE UNIVERSITY OF BRITISH COLUMBIA (Vancouver)  August 2013  ? Jenny Chuang, 2013  ii Abstract A Monte Carlo simulation was conducted to investigate the Type I error rates of several versions of chi-square difference tests for nonnormal data in confirmatory factor analysis (CFA) models. The studied statistics include: 1) the original uncorrected difference test, D , obtained by taking the difference of the ML chi-squares for the respective models; 2) the original robust difference test, 1RD , due to Satorra and Bentler (2001); 3) the recent modification to this test, 2RD , which ensures that the statistic remains positive (Satorra & Bentler, 2010); and 4) a hybrid statistic, HD , proposed by Asparouhov and Muth?n (2010), which is equal to 1RD  when 1RD  > 0, and otherwise is equal to 2RD . Types of constraints studied included constraining factor correlations to 0, constraining factor correlations to 1, and constraining loadings to equal each other within or across factors. An interesting finding was that the uncorrected test D  appeared to be robust to nonnormality when the constraint was setting factor correlations to zero. The robust tests performed well and similarly to each other in many conditions. The new strictly positive test, 2RD  exhibited slightly inflated rejection rates in conditions that involved constraining factor loadings, while 1RD  and HD  exhibited rejection rates slightly below nominal in conditions that involved constraining factor correlations or factor loadings. While more research is needed on the new strictly positive test, the original robust difference test or the hybrid procedure are tentatively recommended.    iii Preface The dissertation is original, unpublished, independent work by the author, J. Chuang. The research was designed in collaboration with the author?s research supervisor, V. Savalei. The simulation components of the research was carried out in collaboration with C.F. Falk.     ivTable of Contents Abstract .................................................................................................................................... ii Preface ...................................................................................................................................... ii Table of Contents ................................................................................................................... iv List of Tables ......................................................................................................................... vii Acknowledgements .............................................................................................................. xxi Dedication ............................................................................................................................ xxii Chapter  1: Introduction ........................................................................................................ 1 1.1 What is Structural Equation Modeling? ....................................................................... 1  1.2 Estimation and Model Fit in SEM ............................................................................... 2 1.2.1 The ML Estimation Method.................................................................................. 3 1.2.2 The GLS Estimation Method ................................................................................ 4 1.3 The ML Chi-Square Statistic and Adjustments for Nonnormality .............................. 5 1.4 Nested Models and the Chi-Square Difference Test.................................................... 6 1.5 The Chi-Square Difference Test and Adjustments for Nonnormality ......................... 9 1.6 Goals of the Present Research .................................................................................... 10 1.7 Definitions of the Studied Statistics ........................................................................... 11 1.8 Overview of Simulation Studies ................................................................................ 14 Chapter  2: Factor Correlations of CFA Models ............................................................... 18 2.1 Study 1A: CFA models with orthogonal vs. correlated factors ................................. 18 2.1.1 Study 1A: Method ............................................................................................... 18 2.1.2 Study 1A: Results ............................................................................................... 18 2.1.2.1 Two-factor models ....................................................................................... 19  v 2.1.2.2 Three-factor models ..................................................................................... 21 2.1.3 Study 1A: Summary ............................................................................................ 24 2.2 Study 1B: One-factor vs. two- or three-factor CFA models ...................................... 25 2.2.1 Study 1B: Method ............................................................................................... 25 2.2.2 Study 1B: Results ................................................................................................ 25 2.2.3 Study 1B: Summary ............................................................................................ 27 Chapter  3: Factor Loadings of CFA Models ..................................................................... 28 3.1 Study 2A: One-factor CFA models with free vs. constrained factor loadings .......... 28 3.1.1 Study 2A: Method ............................................................................................... 28 3.1.2 Study 2A: Results ............................................................................................... 28 3.1.3 Study 2A: Summary ............................................................................................ 29 3.2 Study 2B: Two-factor CFA models with free loadings vs. loadings constrained equal across factors ....................................................................................................................... 30 3.2.1 Study 2B: Method ............................................................................................... 30 3.2.2 Study 2B: Results ................................................................................................ 31 3.1.2.1 Models with four indicators per factor ......................................................... 31      3.1.2.1.1   Factor correlation 0 ............................................................................... 31           3.1.2.1.2   Factor correlation .3 .............................................................................. 32 3.1.2.2 Models with five indicators per factor ........................................................ 34           3.1.2.2.1   Factor correlation 0 ............................................................................... 34           3.1.2.2.2   Factor correlation .3 .............................................................................. 36 3.1.2.3 Models with six indicators per factor .......................................................... 38          3.1.2.3.1   Factor correlation 0 ................................................................................ 38       vi     3.1.2.3.2   Factor correlation .3 .............................................................................. 39 3.2.3 Study 2B: Summary ............................................................................................ 40 Chapter  4: Two-factor CFA models with and without crossloadings ............................. 42 4.1 Study 3: Method ......................................................................................................... 42 4.2 Study 3: Results ......................................................................................................... 42 4.3 Study 3: Summary...................................................................................................... 43 Chapter  5: Conclusion and Overall Discussion ................................................................ 45      5.1   Performance of the Uncorrected Chi-Square Difference Test ................................... 45      5.2   Performance of the Robust Chi-Square Difference Tests .......................................... 46      5.3   Future Directions ....................................................................................................... 48      5.4   Conclusion ................................................................................................................. 48     References .............................................................................................................................. 48  Appendix ................................................................................................................................ 50 Tables for Study 1A ........................................................................................................ 56 Results for two-factor models ..................................................................................... 56 Results for three-factor models ................................................................................... 71 Tables for Study 1B ........................................................................................................ 77 Tables for Study 2A ........................................................................................................ 79 Tables for Study 2B ........................................................................................................ 83 Results for models with four indicators per factor...................................................... 83 Results for models with five indicators per factor .................................................... 101 Results for models with six indicators per factor...................................................... 123     Tables for Study 3 ......................................................................................................... 149  viiList of Tables Table 1    Models Fit and Variables Manipulated in Each Study ........................................... 15 Table 2    Kurtosis Values Used in the Heterogeneous Kurtosis conditions used by model size ................................................................................................................................................. 17 Table 3   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness =  2, kurtosis = 7, ? = 0.5, ? = 0.05 ............................................................................................ 56 Table 4   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 7 (heterogeneous), ? = 0.5, ? = 0.05 ................................................................. 57 Table 5   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 15, ? = 0.5, ? = 0.05 .......................................................................................... 58 Table 6   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 15 (heterogeneous), ? = 0.5, ? = 0.05 ............................................................... 59 Table 7   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 7, ? = 0.7, ? = 0.05 ............................................................................................ 60 Table 8   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 7 (heterogeneous), ? = 0.7, ? = 0.05 ................................................................. 61 Table 9   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 15, ? = 0.7, ? = 0.05 .......................................................................................... 62 Table 10   Rejection Rates of the Restricted and 1-df Absolute Model ?2: 2 factors, skewness = 2, kurtosis = 15 (heterogeneous), ? = 0.7, ? = 0.05 ........................................................... 63 Table 11   Type I Error Rates of 1-df ?2 Difference Tests for 8 Indicator Model with 2 factors, ? = .7, ? = 0.05 ....................................................................................................................... 64  viiiTable 12   Type I Error Rates of 1-df ?2 Difference Tests for 8 Indicator Model with 2 factors, ? = .5, ? = 0.05 ....................................................................................................................... 65 Table 13   Type I Error Rates of 1-df ?2 Difference Tests for 10 Indicator Model with 2 factors, ? = .7, ? = 0.05 .......................................................................................................... 66 Table 14   Type I Error Rates of 1-df ?2 Difference Tests for 10 Indicator Model with 2 factors, ? = .5, ? = 0.05 .......................................................................................................... 67 Table 15   Type I Error Rates of 1-df ?2 Difference Tests for 12 Indicator Model with 2 factors, ? = .7, ? = 0.05 .......................................................................................................... 68 Table 16   Type I Error Rates of 1-df ?2 Difference Tests for 12 Indicator Model with 2 factors, ? = .5, ? = 0.05 .......................................................................................................... 69 Table 17   Proportion of Negative DR1 ?2 Difference Tests for 8 and 10 Indicator Models with 2 factors, ? = 0.5 (Proportion of Negative DR1 Rejected by DH) ............................................ 70 Table 18   Rejection Rates of the Restricted, 2- and 3-df Absolute Model ?2: 3 factors, skewness = 2, kurtosis = 7, ? = 0.7, ? = 0.05 ......................................................................... 71 Table 19   Rejection Rates of the Restricted, 2- and 3-df Absolute Model ?2: 3 factors, skewness = 2, kurtosis = 7 (heterogeneous), ? = 0.7, ? = 0.05 .............................................. 71 Table 20   Rejection Rates of the Restricted, 2- and 3-df Absolute Model ?2: 3 factors, skewness = 2, kurtosis = 15, ? = 0.7, ? = 0.05 ....................................................................... 72 Table 21   Rejection Rates of the Restricted, 2- and 3-df Absolute Model ?2: 3 factors, skewness = 2, kurtosis = 15 (heterogeneous), ? = 0.7, ? = 0.05 ............................................ 72 Table 22   Type I Error Rates of 2-df ?2 Difference Tests for 12 Indicator Model with 3 factors, ? = .7, ? = 0.05 .......................................................................................................... 73  ixTable 23   Type I Error Rates of 2-df ?2 Difference Tests for 15 Indicator Model with 3 factors, ? = .7, ? = 0.05 .......................................................................................................... 74 Table 24   Type I Error Rates of 3-df ?2 Difference Tests for 12 Indicator Model with 3 factors, ? = .7, ? = 0.05 .......................................................................................................... 75 Table 25   Type I Error Rates of 3-df ?2 Difference Tests for 15 Indicator Model with 3 factors, ? = .7, ? = 0.05 .......................................................................................................... 76 Table 26   Type I Error Rates of ?2 Difference Tests for Fitting 2-Factor Models to 1-Factor Data, ? = 0.05 ......................................................................................................................... 77 Table 27   Type I Error Rates of ?2 Difference Tests for Fitting 3-Factor Models to 1-Factor Data, ? = 0.05 ......................................................................................................................... 77 Table 28   Proportion of Negative DR1 ?2 Difference Tests (Proportion of Negative DR1 Rejected by DH) ....................................................................................................................... 78 Table 29   Rejection Rates of the Absolute Model ?2 for 1-Factor Models with Loadings Constrained to be Equal (Unrestricted and df = 1, 2, 3, 4), skewness = 2, kurtosis = 7, ? = 0.05.......................................................................................................................................... 79 Table 30   Rejection Rates of the Absolute Model ?2 for 1-Factor Models with Loadings Constrained to be Equal (Unrestricted and df = 1, 2, 3, 4), skewness = 2, kurtosis = 15, ? = 0.05.......................................................................................................................................... 79 Table 31   Type I Error Rates of 1-df ?2 Difference Tests for 1-Factor Models with Loadings Constrained to be Equal, ? = 0.05 ......................................................................................... 80 Table 32   Type I Error Rates of 2-df ?2 Difference Tests for 1-Factor Models with Loadings Constrained to be Equal, ? = 0.05 ......................................................................................... 80  x Table 33   Type I Error Rates of 3-df ?2 Difference Tests for 1-Factor Models with Loadings Constrained to be Equal, ? = 0.05 ......................................................................................... 81 Table 34   Type I Error Rates of 4-df ?2 Difference Tests for 1-Factor Models with Loadings Constrained to be Equal, ? = 0.05 ......................................................................................... 81 Table 35   Proportion of Negative DR1 ?2 Difference Tests for 1-Factor Models with Loadings Constrained to be Equal (Proportion of Negative DR1 Rejected by DH) ................................. 82 Table 36   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.5, ? = 0.05.......................................................................................................................................... 83 Table 37   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.5, ? = 0.05.......................................................................................................................................... 84 Table 38   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.5, ? = 0.05.......................................................................................................................................... 85 Table 39   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.5, ? = 0.05.......................................................................................................................................... 86 Table 40   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.7, ? = 0.05.......................................................................................................................................... 87  xiTable 41   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.7, ? = 0.05.......................................................................................................................................... 88 Table 42   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.7, ? = 0.05.......................................................................................................................................... 89 Table 43   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.7, ? = 0.05.......................................................................................................................................... 90 Table 44   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 4 Indicators per Factor, ? = 0, 1-df (Proportion of Negative DR1 Rejected by DH) ......................................................................... 91 Table 45   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.5, ? = 0.05.......................................................................................................................................... 92 Table 46   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0.3, ? = 0.5, ? = 0.05.......................................................................................................................................... 93 Table 47   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.5, ? = 0.05.......................................................................................................................................... 94  xiiTable 48   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.5, ? = 0.05.......................................................................................................................................... 95 Table 49   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.7, ? = 0.05.......................................................................................................................................... 96 Table 50   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0.3, ? = 0.7, ? = 0.05.......................................................................................................................................... 97 Table 51   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.7, ? = 0.05.......................................................................................................................................... 98 Table 52   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.7, ? = 0.05.......................................................................................................................................... 99 Table 53   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 4 Indicators per Factor, ? = 0.3, 1-df (Proportion of Negative DR1 Rejected by DH) ....................................................................... 100 Table 54   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 101  xiiiTable 55   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 102 Table 56   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 103 Table 57   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 104 Table 58   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 105 Table 59   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 106 Table 60   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 107 Table 61   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 108  xiv Table 62   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 109 Table 63   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 110 Table 64   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 5 Indicators per Factor, ? = 0, 1-df (Proportion of Negative DR1 Rejected by DH) ....................................................................... 111 Table 65   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 112 Table 66   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 113 Table 67   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 114 Table 68   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 115  xv Table 69   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 116 Table 70   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 117 Table 71   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 118 Table 72   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 119 Table 73   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 120 Table 74   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 5 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 121 Table 75   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 5 Indicators per Factor, ? = 0.3, 1-df (Proportion of Negative DR1 Rejected by DH) ....................................................................... 122  xvi Table 76   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 123 Table 77   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 124 Table 78   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 125 Table 79   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 126 Table 80   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 127 Table 81   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (6-df), ? = 0, ? = 0.5, ? = 0.05........................................................................................................................................ 128 Table 82   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 129  xvii Table 83   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 130 Table 84   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 131 Table 85   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 132 Table 86   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 133 Table 87   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (6-df), ? = 0, ? = 0.7, ? = 0.05........................................................................................................................................ 134 Table 88   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 6 Indicators per Factor, ? = 0, 1-df (Proportion of Negative DR1 Rejected by DH) ....................................................................... 135 Table 89   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 136  xviiiTable 90   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 137 Table 91   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 138 Table 92   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 139 Table 93   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 140 Table 94   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (6-df), ? = 0.3, ? = 0.5, ? = 0.05........................................................................................................................................ 141 Table 95   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (1-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 142 Table 96   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (2-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 143  xix Table 97   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (3-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 144 Table 98   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (4-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 145 Table 99   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (5-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 146 Table 100   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 6 Indicators per Factor and Crossfactor Loadings Constrained to be Equal (6-df), ? = 0.3, ? = 0.7, ? = 0.05........................................................................................................................................ 147 Table 101   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with Two Loadings Constrained to be Equal Across Factors, 5 Indicators per Factor, ? = 0.3, 1-df (Proportion of Negative DR1 Rejected by DH) ....................................................................... 148 Table 102   Rejection Rates of Absolute Model ?2 for 2-Factor Models with 0-4 Cross Loadings, skewness = 2, kurtosis = 7, ? = 0.05 ................................................................... 149 Table 103   Rejection Rates of Absolute Model ?2 for 2-Factor Models with 0-4 Cross Loadings, skewness = 2, kurtosis = 7 (heterogeneous), ? = 0.05 ........................................ 149 Table 104   Rejection Rates of Absolute Model ?2 for 2-Factor Models with 0-4 Cross Loadings, skewness = 2, kurtosis = 15, ? = 0.05 ................................................................. 150 Table 105   Rejection Rates of Absolute Model ?2 for 2-Factor Models with 0-4 Cross Loadings, skewness = 2, kurtosis = 15 (heterogeneous), ? = 0.05 ...................................... 150  xx Table 106   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 1 Cross Loading, ? = 0.05 ................................................................................................................. 151 Table 107   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 2 Cross Loadings, ? = 0.05 ................................................................................................................ 152 Table 108   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 3 Cross Loadings, ? = 0.05 ................................................................................................................ 153 Table 109   Type I Error Rates of ?2 Difference Tests for 2-Factor Models with 4 Cross Loadings, ? = 0.05 ................................................................................................................ 154 Table 110   Proportion of Negative DR1 ?2 Difference Tests for 2-Factor Models with or without Cross Loadings (Proportion of Negative DR1 Rejected by DH) ................................ 155         xxi Acknowledgements I thank Dr. Savalei for her mentorship, for teaching me structural equation modeling and for guiding me to a better understanding of the field of quantitative psychology. I also thank Dr. Biesanz and Dr. Paulhus for serving on my committee and for helping me in the process of my Master?s studies. I would also like to thank Dr. Falk for helping me understand the computational aspects my research, and my Research Assistants, Cathy Zhang and Nicole Gehring, for their laborious work on managing the tremendous amount of data that this study has generated. Finally, I would like to thank my family and friends, who have supported me throughout my educational experience.               xxii Dedication I dedicate this work to my brother, who has been a constant source of love, support, and inspiration throughout my life. I owe my deepest gratitude to him for everything he?s done for me.    1Chapter  1: Introduction 1.1 What is Structural Equation Modeling? Structural equation modeling (SEM) is a statistical modeling tool appropriate for large multivariate datasets that has become extremely popular across many social and behavioral sciences. The technique allows researchers to construct latent variables, which are variables used to represent constructs of interest but are not directly observed in the dataset. Rather, latent variables are estimated indirectly in the model via several observed variables that serve as ?indicators? of the latent variable. Indicators for a latent variable are selected based on the degree to which they ?tap into? the latent construct. SEM can then be used to represent the relationships between observed variables and latent variables, via a measurement model; and to represent the relationships between latent variables and other latent variables, via a structural model (Bollen, 1989). Structural equation models (SEMs) can represent the relationships between different variables by expressing the covariance structure of the variables in terms of the parameters of the proposed model. In confirmatory factor analysis (CFA) models, the combination of the measurement model and the structural model represents the causal connections between latent variables and their indicators, and between latent variables and other latent variables. Exploratory data modeling is possible using exploratory factor analysis (EFA), but this is typically outside of the scope of SEM, which focuses more on CFA modeling. The CFA model is a theory-based representation of how the variables are related to each other in the population. SEM gives researchers the ability to test the feasibility of these theory-based models against empirical data.   2SEM allows for error to be modeled explicitly by specifying residual terms for the dependent variables. There is no prior assumption that the residuals should be uncorrelated. Rather, the residuals can be uncorrelated with each other, or correlated with any other variable in the model. This allows for additional flexibility for researchers when specifying the model. The primary goal of SEM is to evaluate model fit. That is, to examine the extent to which the relationships between the variables as specified in the model proposed by the researcher are representative of the relationships found among these variables in the population. Traditionally, the chi-square test statistic T has been used for this purpose. An overall test of model fit T is obtained by fitting the proposed model to the data. Under the assumption of multivariate normality, the statistic T is chi-square distributed with associated degrees of freedom. If the normality assumption is reasonable and T is not significant, the model is found to fit the data.  The present study evaluated several test statistics that help researchers compare the fit of different CFA models to data. The purpose of the study will be presented in greater detail section 1.6. 1.2 Estimation and Model Fit in SEM Traditionally, model fit was determined based on the decision to reject or retain the null hypothesis, * ( )?? = ? , where *?  is the population covariance matrix and ( )??  is the covariance structure of the data under the proposed model, or the model-implied covariance matrix. That is, the proposed model is tested as to whether it is true in the population, making the test one of ?exact fit? (Fan, Thompson, & Wang, 1999). In practice, *?  is unknown, and researchers instead compare the model-implied covariance matrix to the sample covariance  3matrix S. The test of exact fit involves the use of the chi-square test statistic and its associated p-value to assess the size of the residuals, the discrepancy between the model-implied covariance matrix and the sample covariance matrix.  There are several steps involved in the evaluation of model fit. First, a model is proposed that represents the hypothesized relationships amongst variables in the population. Then, the model is fit to sample data and parameter estimates ? are obtained, from which the model-implied covariance matrix ( )?? is subsequently constructed. Model fit is determined by examining the discrepancies between this model-implied covariance matrix and the sample covariance matrix S. As such, one of the goals of SEM is to obtain parameter estimates ? such that the model-implied covariance matrix is as close to the sample covariance matrix S as possible. This can be accomplished by minimizing a discrepancy function ( )F ? , which assesses of the difference between the model-implied covariance matrix ( )?? and the sample covariance matrix \u0001.  1.2.1 The ML Estimation Method Normal theory maximum likelihood (ML) is the most popular estimation method in SEM (Anderson & Gerbing, 1984). It is based on the assumption of multivariate normality of variables. For the covariance structure in SEM, the discrepancy function to be minimized is the ML fit function 1 1( , ( )) { ( ) } ln ( )MLF S tr S S p? ? ?? ?? = ? ? ? ? , where S is the sample covariance matrix, ( )?? is the model-implied covariance matrix, ? is the 1q ?  vector of model parameters, and p is the number of variables. Denoting the fit function minimum by ?MLF , the overall test of model fit is computed as ?( 1) MLT N F= ? , where N  is sample size. Larger values of ?MLF  indicate larger residuals between the model-implied covariance matrix  4and the sample covariance matrix (Amemiya & Anderson, 1990). If the residuals are larger than what would be expected due to sampling fluctuation given sample size N, the T statistic will be greater than the critical value of the chi-square at the nominal alpha level. The null hypothesis * ( )?? = ?  would be rejected, indicating the hypothesized model structure is not true in the population. If residuals are within sampling fluctuation given N, the T statistic would be small, and the model would be retained. When the data are multivariate normal and the model is correctly specified, the test statistic T is asymptotically distributed as chi-square with degrees of freedom equal to ( 1) \/ 2df p p q= + ? . This statistic is commonly referred to as the ML chi-square.  1.2.2 The GLS Estimation Method The most general alternative approach to estimation in covariance structure models was developed by Browne (1974), who showed that a class of generalized least squares (GLS) estimators could be developed that have many of the same large-sample properties as the ML estimators. He also developed associated goodness-of-fit tests. The unweighted least squares goodness-of-fit function is 21( ) ( )2ULSF tr S? = ? ? , where the q parameters in ? are chosen to minimize ( )ULSF ? . This function represents an unweighted sum of squares of the residual elements. The GLS goodness-of-fit function 21( ) [( ) ]2GLSF tr S W? = ? ?  minimizes the sum of squares of a weighted residual matrix, where W is a weight matrix. Browne (1974) showed that the ML and GLS fit functions equal to each other as the residuals approach zero and W estimates 1?? . Lee (1977) showed that ML and GLS estimators are asymptotically equal. Unlike ML estimators, GLS estimators are not based on the assumption of multivariate  5normality of variables. The assumption of multivariate normality and adjustments for nonnormality will be elaborated upon in the following section. 1.3 The ML Chi-Square Statistic and Adjustments for Nonnormality The assumption of multivariate normality is often violated in practice. While the ML parameter estimates remain consistent under nonnormality (Shapiro, 1985), the standard error estimates and the model test of fit are no longer accurate. One solution is to use asymptotic distribution-free (ADF) methods of testing for goodness of fit, which do not require distribution assumptions on the observed variables. The GLS estimator mentioned above is an example of an ADF method. However, the computation of these estimators involve fourth-order sample moments, therefore may require large samples to be robust. An ADF statistic that is asymptotic chi-square regardless of distribution was first introduced by Browne (1984). Its performance was studied by Bentler and Yuan (1999) and found to be adequate only in very large samples. As an alternative to ADF methods, a normal-theory estimation method can be used together with asymptotic robust standard errors and test statistics (Satorra, 1992). However, these statistics may still lack robustness to small and medium-sized samples.  As a result of the tendency for the ADF and other alternative methods to lack robustness in realistic sample sizes, the most common solution to the problem of nonnormality in SEM is to use a scaling correction for the test of model fit. Satorra and Bentler (1988; 1994) provided adjusted standard errors and an adjusted test of model fit that are robust to nonnormality. The robust test of fit, often referred to as the ?Satorra-Bentler scaled chi-square,? is obtained from the ML chi-square by rescaling it by a constant, \/RT T c= . While RT  does not have an asymptotic chi-square distribution under  6nonnormality, it approximates it in the mean. This approximation has been found to perform well in simulation studies (e.g., Chou, Bentler, & Satorra, 1991; Curran, West, & Finch, 1996; Hu, Bentler, & Kano, 1992, Satorra & Bentler, 1988) and has been implemented in many SEM programs.  Currant, West and Finch (1994) compared the empirical performance of the ML chi-square statistic T , the ADF chi-square statistic and the Satorra-Bentler scaled chi-square statistic RT . The authors studied the effects of sample size, nonnormality, and model specification on the three statistics. In general, the ML chi-square showed no evidence of bias across all sample sizes under multivariate normal distributions but was significantly inflated with increasing nonnormality. The ADF statistic was substantially inflated at smaller sample sizes, and the inflation was exacerbated with increasing nonnormality. However, it was unbiased at sample sizes of N = 500 and above, regardless of distribution. The RT  statistic performed quite well across nearly all sample sizes and all distributions and showed no bias even under severely nonnormal distributions at sample sizes N = 200 or more. The ADF test statistic might be a less powerful test of the null hypothesis, compared to T and RT . It was also found that the ADF and RT  test statistics lose power as nonnormality increases.  1.4 Nested Models and the Chi-Square Difference Test Quite often researchers are interested comparing two or more models against each other. When the two models are properly nested, model comparisons can be carried out by the chi-square difference test. Nested models will be discussed next. In the more general type of nesting, known as covariance structure nesting, Model 0 is nested within Model 1 if the set of all covariance matrices possible under Model 0 is a  7subset of the set of all of the covariance matrices under Model 1 (Bentler & Bonett, 1980). The two models can be written as a function of their respective parameter vectors, as   0 0 0 0: ( )M f? ?=  and 1 1 1 1: ( )M f? ?= , where 0 1M M? .In covariance structure nesting, the functions 0f  and 1f  need not be identical, and the parameter vector 0?  does not need to be a special case of the vector 1? . However, the set of covariance matrices 0 0( )f ?  must be a subset of the set of matrices 1 1( )f ? . In parameter nesting, the functions 0f  and 1f  are identical. Model 0 and Model 1 differ only in that the parameter vector 0?  is a special case of the vector 1? , obtained by constraining free parameters to equalities or known constants. In other words, two models are nested in the parameter sense if the parameters of one model are a subset of the parameters of the other. The comparison of models by chi-square difference tests is typically limited to parameter nesting, in which nesting can be easily verified. The definition of covariance structure nesting includes that of parameter nesting; that is, if two models are nested in the parameter sense, they are also nested in the covariance structure sense. The null hypothesis associated with parameter nesting is a test of the equality of parameters under the two models, i.e., 0 0 1:H ? ?= . The null hypothesis associated with model comparisons, under covariance matrix nesting, is that the covariance matrices generated by the parameter vectors under the structural models are equivalent under 0M and 1M . That is, 0 0 0 1 1: ( ) ( )H f f? ?= .   8If the original model is the less restricted Model 1 that fits the data, and Model 0 is a nested more restricted model, a chi-square difference test is given by 0 1D T T= ? . Given that the less restricted model is true in the population, this test is asymptotically chi-square distributed with 0 1Ddf df df= ?  if the more restricted model is also true in the population (Steiger, Shapiro, Browne, 1985). Informally, the chi-square difference test tells the researcher whether fitting a more parsimonious Model 0 leads to a significant worsening of fit.  In practice, the difference test is also frequently used when the original model is the more restricted Model 0 that does not fit the data, and Model 1 is the less restricted modified model. J?reskog (1981) proposed the use of chi-square in exploratory studies: If the chi-square value is large compared to its degrees of freedom, researchers can examine the residuals and propose a more relaxed model that introduces more parameters. The more relaxed model will usually yield a smaller chi-square. If the drop in chi-square is large compared to the difference in degrees of freedom, this indicates the change made in the model represents improvement. If the drop in chi-square is close to the difference in the degrees of freedom, this indicates the improvement in fit is obtained by ?capitalizing on chance,? and the added parameters may not have any real meaning. In this case, the difference test is informally interpreted as telling the researcher whether releasing some constraints leads to a significant improvement in fit (Byrne, Shavelson, & Muth?n, 1989; Steiger, Shapiro, Browne, 1985). However, there appears to be some disagreement about whether the use of the difference test when the original model does not fit the data is technically justified (J?reskog,  91978; Steiger, Shapiro, Browne, 1985; Yuan & Bentler, 2004). For instance, simulation studies showed that when the original model is misspecified, the chi-square difference test cannot control either type I or type II errors for realistic sample sizes (Yuan & Bentler, 2004). A misspecified model leads to biased parameters, and model inferences based on the difference test D  can be quite misleading. The authors also pointed out that applying a series of nonsignificant difference tests may lead to a highly significant final model.  1.5 The Chi-square Difference Test and Adjustments for Nonnormality Difference tests are more difficult to generalize to nonnormal data compared to the overall test of model fit. The most intuitive way to obtain a robust difference test is to take the difference of the robust test statistics for Models 0 and 1 (e.g., Byrne & Campbell, 1999). Unfortunately, this is not the correct robust difference test generalization, as the difference of robust test statistics no longer matches the mean of a chi-square distribution with the degrees of freedom corresponding to the difference test. Satorra (2000) defined the correct version of the robust difference test, but his estimator of the scaling correction is difficult to compute. Satorra and Bentler (2001) proposed a computationally feasible version, referred to here as 1RD . This statistic is asymptotically equivalent to the statistic proposed by Satorra (2000) but is easily obtainable from the output of standard SEM software. A disadvantage of 1RD noted by applied researchers is that it can take on negative values. In response to this criticism, Satorra and Bentler (2010) proposed a new procedure that allows researchers to obtain the strictly positive difference test statistic 2RD . This method is essentially a way to obtain the original statistic proposed by Satorra (2000). Finally, Asparouhov and Muth?n (2010) suggested a hybrid test, HD , which would equal the original robust difference test statistic,  101RD , already familiar to researchers, unless this statistic is negative, in which case the hybrid test would equal to 2RD .  A small Monte Carlo study was conducted by Satorra and Bentler (2001) to evaluate the performance of 1RD  in the context of a multiple group model. This study examined a 4-df ?errors-in-variables? regression model, where ? is the 4x1 vector of model parameters. Sample sizes were varied from 100 to 900. Model 0 restricted the population parameter vector ? to be invariant across groups, and Model 1 allowed ? to vary across groups. The robust difference test 1RD performed well, and the test statistic has since gained popularity. However, except for this limited study, the empirical performance of 1RD  has never been evaluated in a thorough simulation. Satorra (2000) reported the results of a Monte Carlo simulation studying the same multiple group model as in Satorra and Bentler (2001). He showed that in small samples his new robust difference statistic, which is essentially 2RD , outperformed other alternatives that were being considered.  1.6 Goals of the Present Research These robust difference tests have never been empirically evaluated side-by-side in a thorough simulation study. The aim of the present study is to fill this gap and to provide a much needed empirical evaluation of 1RD , 2RD and HD , relative to each other and to D . The uncorrected difference test was included to demonstrate the extent to which nonnormality compromises its results, and to provide a baseline comparison for the robust corrections.   11The simulation studies examined several kinds of constraints in the context of single-group CFA models. CFA models are the most common type of covariance structure SEM models, and are often used for an initial evaluation of new statistical developments. While cross-group constraints are certainly a common application of chi-square difference tests, difference tests are quite frequently used in single-group SEM models. For instance, difference tests have been used to test whether a measurement model is tau-equivalent vs. congeneric (Leite, Svinicki, & Shi, 2010; Li, Harmer, Duncan, Duncan, Acock & Yamamoto, 1998), whether corresponding loadings on multiple factors are equal (e.g., van Dijk, Boer, Koot, Tibboel, Passchier, & Duivenvoorden, 2000), whether some factor correlations in a multi-factor CFA can be constrained to zero (e.g., DeYoung, 2006; Biesanz & West, 2004), or whether a crossloading can be set to zero (e.g., Huang, Lin, & Wang, 2006). As this is the first set of studies to evaluate the performance of robust difference tests, the focus of the present study is limited to single-group CFA models.   The studies differed in the type of constraints differentiating the less restricted and the more restricted models. Only correct models were fit to the data, and thus the null hypothesis was true for all of the fitted models. For the difference test, the null hypothesis is that given that the less restricted model is true, the more restricted model is also true. Hence, rejection rates for the models in each study provide information about Type I error rates. As this is the first set of studies systematically evaluating the performance of the different robust difference tests, the focus was on Type I errors only and power was not studied. 1.7 Definitions of the Studied Statistics Let 0M and 1M  be two nested models with degrees of freedom 0df and 1df , respectively, with 0M being more restricted and 1M  less restricted (i.e., 0M is nested within  121M ). The uncorrected chi-square difference test 0 1D T T= ?  is the difference between the ML chi-squares for 0M and 1M  (J?reskog, 1970; 1981; Satorra, 1989). When the data are multivariate normal, and assuming 0M holds, D  is asymptotically chi-square distributed with degrees of freedom 0 1Ddf df df= ?  (Steiger, Shapiro, & Browne, 1985). This is the test of 0M given 1M .  When the data are not multivariate normal, D is not chi-square distributed and its mean is typically inflated, requiring a correction. The corrected test is typically referred to as the robust or scaled chi-square difference test. Satorra and Bentler (2001) proposed the robust difference test statistic 1 \/R dD D c= , where dc  is a scaling constant that requires only standard SEM output for its computation. To compute 1RD , the ML and the robust chi-square statistics when fitting both models are first obtained. That is, the overall ML chi-square 0T  and the overall robust chi-square 0RT are obtained from fitting 0M, and the overall ML chi-square 1T  and the overall robust chi-square 1RT  are obtained from fitting 1M . Then, the scaling corrections are obtained as follows: 00 0\/ Rc T T= , 11 1 \/ Rc T T= . Finally, the robust difference test is computed as 0 11 ( \/)R dD T cT?=  with 0 0 1 1( ) \/d Dc df c df c df= ?  and is compared to a chi-square distribution with degrees of freedom equal to Ddf . When the sample size is small, or when 0M is highly incorrect, the scaling constant dc  may occasionally turn out to be negative, resulting in negative values of 1RD  (Satorra & Bentler, 2001). Since negative values of the chi-square difference test are not interpretable, when this  13occurs, researchers are sometimes advised to resort to other measures, such as a difference test using the residual-based test statistics (Bentler, 2006). However, this strategy does not allow for a meaningful study of this statistic, as throwing out replications with negative values will compromise the results. An alternative strategy that is also sometimes recommended is to round the negative statistic to the nearest admissible value, which is zero, and to set the p-value to 1 (the more restricted model is retained). This is the procedure followed in the present study when evaluating the performance of 1RD .   Recently, Satorra and Bentler (2010) developed a computational procedure to obtain the estimator of the scaling correction for the original robust difference test computation proposed by Satorra (2000). This procedure results in a statistic 2RD , which always remains positive. The procedure for computing the strictly positive difference test 2RD  involves an additional model run, 10M , which is obtained by fitting 1M  using the final estimates obtained from fitting 0M as starting values and setting the number of iterations set to 0. If 101T and 110RT  are the uncorrected and robust absolute model chi-squares obtained from this run 10M , then 110 10 101 1 \/ Rc T T= , and the new scaling correction is computed as 10 100 0 1 1( ) \/d Dc df c df c df= ? , hence the new robust difference test statistic is defined as 102 0 1( ) \/R dD T T c= ? . While 2RD  is asymptotically equivalent to 1RD  (Satorra and Bentler, 2010), it may behave differently in small samples.  Finally, Asparouhov and Muth?n (2010) suggested a hybrid test statistic HD , which is equal to the original robust test statistic 1RD  when it is greater than zero, and otherwise is  14equal to the strictly positive robust test statistic 2RD . Asymptotically, HD  is equivalent to 1RD  and 2RD . Thus, all three tests should have the same Type I error rates in large samples, but their behavior in small and medium samples may be different.  1.8 Overview of Simulation Studies Study 1 (A and B) evaluated constraints on the factor correlations. These results are provided in Chapter 2. In Study 1A, the factor correlation was constrained to 0; in Study 1B, it was constrained to 1. Study 2 (A and B), described in Chapter 3, evaluated constraints on factor loadings. In Study 2A, factor loadings were constrained to be equal within a single factor; in Study 2B, factor loadings were constrained to be equal across two factors. Finally, Study 3 evaluated difference tests comparing models with or without crossloadings. These results are provided in Chapter 4. The next section describes the method of generating data that was common to all studies. Methods unique to each study are described in the chapters thereafter. Table 1 lists the type of constraint created and the variables manipulated in each study.    15Table 1. Models Fit and Variables Manipulated in Each Study. Study Restricted Model M0 Unrestricted Model M1 Number of Indicators per Factor Factor Correlation ? Factor Loadings ? Kurtosis 1A 2-factor CFA, uncorrelated factors  3-factor CFA, uncorrelated factors 2-factor CFA with freely estimated factor correlations, (1-df)  3-factor CFA, with 2 and 3 freely estimated factor correlations (2- and 3-df) 4, 5, 6 0 .5, .7 Homogeneous and heterogeneous 1B 1-factor CFA 2-factor CFA, (1-df)  3-factor CFA, (3-df)  Freely estimated factor correlations 12 N\/A .7 Homogeneous only 2A 1-factor CFA, with 2, 3, 4 or 5 loadings constrained to be equal (1- to 4-df) 1-factor CFA 5 N\/A .7 Homogeneous only 2B 2-factor CFA, with 1 to all pairs of loadings constrained across factors (1- to 6-df) Freely estimated factor correlations 2-factor CFA 4, 5, 6 0, .3 .5, .7 Homogeneous and heterogeneous 3 2-factor CFA  2-factor CFA, with 1-4 crossloadings added in alternating succession (1- to 4-df)  Freely estimated factor correlations  5 0 .7 Homogeneous Note. Two other variables were always manipulated in all studies: average univariate kurtosis of 7 vs. 15, and sample size of N=100, 200, 300, 400, 500, and 1000.  16 Continuous nonnormal data were generated using the simulation feature of EQS 6.2 (Bentler, 2006), which uses the methodology of Fleishman (1978) and Vale and Maurelli (1983) to create nonnormal data with prespecified univariate skewness and kurtosis. The skewness was set to 2 for all variables; kurtosis was set to either the average of 7 for all variables (moderate nonnormality) or to the average of 15 (extreme nonnormality). All variables were set to either have the same univariate kurtoses (homogeneous kurtosis condition) or to have different univariate kurtoses that averaged to the same values (heterogeneous kurtosis condition). This manipulation was also used by Fouladi (2000) and Savalei (2010). Table 2 shows the kurtosis values used in the heterogeneous kurtosis conditions. Specific details of the data generating models will be described within each study. The number of factors in the CFA models from which the data were generated varied across studies, as did the number of indicators per factor. Factor loadings were .5 or .7, or both, across studies.  Observed variables were created to have variance 1. The sample size was set to be 100, 200, 300, 400, 500, and 1000. One thousand replications were generated per condition.    17Table 2. Kurtosis Values used in the Heterogeneous Kurtosis Conditions by Model Size Total Number of Indicators Average Kurtosis Univariate Kurtoses 8 7 5.25, 5.75, 6.25, 6.75, 7.25, 7.75, 8.25, 8.75  8 15 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5 10 7 5.5, 5.85, 6.2, 6.55, 6.9, 7.25, 7.6, 7.95, 8.3, 8.65 10 15 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5 12 7 5.75, 6.0, 6.25, 6.5, 6.75, 7.0, 7.25, 7.5, 7.75, 8.0, 8.25, 8.5 12 15 9.5, 10.5, 11.5, 12.5, 13.5, 14.5, 15.5, 16.5, 17.5, 18.5, 19.5, 20.5    18Chapter  2: Factor Correlations of CFA Models The first study involved applying constraints to the factor correlations of two- or three-factor CFA models.  2.1 Study 1A: CFA models with orthogonal vs. correlated factors 2.1.1 Study 1A: Method Study 1A examined the performance of the difference test statistics when a CFA model with uncorrelated factors was compared to a less restricted CFA model with freely estimated factor correlations. The study had a fully crossed design with the following variables: number of factors (2 or 3); model size (4, 5 or 6 indicators per factor); factor loadings (.5 or .7); degree of nonnormality (moderately or extremely nonnormal); type of nonnormality (homogeneous or heterogeneous univariate kurtosis). In the 2-factor conditions, the less restricted model 1M  had a freely estimated factor correlation, and the more restricted model 0M  specified the factors as orthogonal. The corresponding difference test thus had 1 degree of freedom. In the 3-factor conditions, the less restricted 1M  had 2 or 3 factor correlations freely estimated, and the more restricted model 0M  specified the factors as orthogonal. The corresponding difference tests thus had 2 or 3 degrees of freedom. In order to study Type I error, data were always generated from the more restricted model 0M .  2.1.2 Study 1A: Results The results of all of the studies conducted for this project are provided in tables in the  Appendix section of this document. In addition, results are summarized in text within their respective chapters.  192.1.2.1 Two-factor models Results for the two-factor CFA models are summarized in this section. Rejection rates for the overall tests of model fit for each of the fitted models are presented in Tables 3-10. The columns under 0T and 0RT correspond to the rejection rates of the uncorrected and scaled absolute model chi-square of the restricted model 0M , respectively. The columns under 1Tand 1RT correspond to the rejection rates of the uncorrected and scaled absolute model chi-squares of the less restricted model 1M , respectively.  Across the range of factor loadings, sample sizes, and nonnormality conditions studied, uncorrected rejection rates of the model chi-square for the more restricted and less restricted models 0M and 1M ranged from 8-25% for models with 4 indicators per factor. 0T and 1T  performed poorly, as expected, over-rejecting models in every condition. Rejection rates were higher for larger models. For models with 5 indicators per factor, uncorrected rejection rates for 0M and 1M ranged from 12-44%. For models with 6 indicators per factor, the range was 15-64%. The uncorrected overall test of model fit statistics 0T  and 1T  over-rejected slightly more in larger sample sizes. The statistics over-rejected models quite a bit more in the conditions with factor loadings of .7 compared to those with factor loadings of .5, and this difference was more pronounced in models with more indicators per factor. Rejection rates were higher for extremely nonnormal conditions compared to moderately nonnormal conditions. For the models with 6 indicators per factor, rejection rates were higher for conditions with homogeneous kurtosis compared to those with heterogeneous kurtosis. Otherwise, there was no appreciable difference between conditions  20with different types of nonnormality.  Across all conditions, the scaled overall tests of model fit statistics 0RT and 1RT  corrected for the over-rejection slightly, bringing the rejection rates down to 5-18%. However, they still over-rejected in nearly every condition. Type I error rates for the studied statistics for the two-factor models examined in this study are presented in Tables 11-16. For ? = .05 and 1000 replications, the range of acceptable rejection rates is 3.75-6.25% (Serlin, 2000; Serlin & Lapsley, 1985). Values that fell outside the 3.75-6.25% range are bolded.  Additionally, cells that had Type I error rates above 6.25% are shaded. Values above 6.25% are considered over-rejections; values below 3.75% are considered under-rejections. These criteria will be used to evaluate performance of statistics studied.  The results for models with 4 indicators per factor will be discussed first, followed by a comparison with results for models with 5 and 6 indicators per factor.  Tables 11 and 12 reveal that for models with 4 indicators per factor, the original uncorrected test D performed well and was within the 3.75-6.25% range almost always. The original robust test statistic 1RD also performed well, slightly over-rejecting in small sample sizes for conditions with factor loadings ? = .5. The Type I error rates for the strictly positive 2RD  were mostly within the 3.75-6.25% range, but showed some over-rejection in small sample sizes for conditions with factor loadings ? = .5. The Type I error rates for the hybrid difference test HD were largely identical to those of 1RD , differing slightly in the smallest sample sizes. This pattern of performance holds for models with 5 indicators per factor (presented in Tables 13 and 14) and for models with 6 indicators per factor (presented in Tables 15 and 16).  21Rejection rates sometimes differed with the degree of nonnormality. For models with 4 indicators per factor and factor loadings ? = .7, rejection rates were generally higher under extreme nonnormality, compared to moderate nonnormality. However, there were no appreciable differences in the rejection rates under moderate vs. extreme nonnormality in the conditions with factor loadings ? = .5. This pattern holds for models with 6 indicators per factor, but not for models with 5 indicators per factor. Rejection rates for models with 5 indicators per factor did not differ appreciably based on the degree of nonnormality.   Rejection rates sometimes differed with the type of nonnormality, for models with 4 indicators per factor. When factor loadings were ? = .5, rejection rates were generally higher in conditions with heterogeneous kurtosis, compared to those with homogeneous kurtosis. There were no appreciable differences in the rejection rates in the homogeneous vs. heterogeneous kurtosis conditions when ? = .7. This was not the case for larger models. For the models with 5 or 6 indicators per factors, rejection rates did not differ appreciably for the homogeneous vs. heterogeneous kurtosis conditions. The proportion of replications in which the 1RD statistic turned out negative was usually between 0.1% and 1%, and never higher than 2% (see Table 17). The negative values of 1RD occurred most frequently in sample sizes of 100 to 300. Of the 1RD  statistics that were negative, approximately half were rejected by HD . 2.1.2.2 Three-factor models Results for the three-factor CFA models are summarized in this section. Only models with 4 or 5 indicators per factors, and factor loadings equal to .7 are presented. Rejection rates for the overall tests of model fit for each of the fitted models are presented in Tables 18- 2221. The columns under 0T and 0RT  correspond to the rejection rates of the uncorrected and scaled absolute model chi-square of the restricted model 0M , respectively. The columns under 2T and 2RT  correspond to the rejection rates of the uncorrected and scaled absolute model chi-squares of the less restricted model with two freely estimated factor correlations, respectively. Finally, the columns under 3T and 3RT  correspond to the rejection rates of the uncorrected and scaled absolute model chi-squares of the less restricted model with all three factor correlations freely estimated, respectively.  Across the range of factor loadings, sample sizes, and nonnormality conditions studied, uncorrected rejection rates of the model chi-square for the more restricted and less restricted models ranged from 12-44% for models with 4 indicators per factor. 0T , 2T  and 3T  all performed poorly, as expected, over-rejecting models in every condition. Rejection rates were higher for larger models. For models with 5 indicators per factor, uncorrected rejection rates for the more restricted and less restricted models ranged from 25-47%. Rejection rates were higher for models with fewer constraints, and for those with extremely nonnormal data. Rejection rates did not differ appreciably across the homogeneous vs. heterogeneous kurtosis conditions. Across all conditions, the scaled overall tests of model fit statistics 0RT , 2RT and 3RT  corrected for the over-rejection slightly, bringing the rejection rates down to 5-24%. However, they over-rejected in nearly every condition. Type I error rates for the studied statistics for the three-factor models examined in this study are presented in Tables 22-25. The results for difference tests with 2-df are presented first, followed by difference tests with 3-df.   23Table 22 presents the 2-df difference test results for models with 4 indicators per factor. The original uncorrected test D , the original robust test statistic 1RD , as well as the strictly positive test 2RD  performed well and were within the 3.75-6.25% range most of the time, occasionally over-rejecting in small sample sizes. The performance of HD was largely identical to that of 1RD , differing slightly in the smallest sample sizes.  Table 23 presents the 2-df difference test results for models with 5 indicators per factor. The original uncorrected test D performed well and was largely within the 3.75-6.25% range, slightly over-rejecting in small sample sizes for conditions with extreme nonnormality. The performance for the original robust test statistic 1RD was worse than that of D  in these conditions: 1RD was able to maintain Type I error rate, but under-rejected in extremely nonnormal conditions. 2RD  performed well and was within the 3.75-6.25% range almost always. The performance of the hybrid difference test HD was largely identical to that of 1RD , differing slightly in the smallest sample sizes. For difference tests with 2-df, the original and hybrid robust statistics 1RD  and HD  exhibited more under-rejection when the models were larger (i.e. models with more indicators per factor). Table 24 presents the 3-df difference test results for models with 4 indicators per factor, which were very similar to the results for the 2-df tests. The original uncorrected test D performed well and was largely within the 3.75-6.25% range, slightly over-rejecting in small sample sizes for conditions with extreme nonnormality. The original robust test statistic 1RD was able to maintain Type I error rate, but under-rejected in extremely  24nonnormal conditions with small sample sizes. The strictly positive 2RD  performed well and was within the 3.75-6.25% range almost always. The performance of the hybrid difference test HD was largely identical to that of 1RD , differing slightly in the smallest sample sizes.  Table 25 presents the 3-df difference test results for models with 5 indicators per factor, which were similar to the results for models with 4 indicators per factor. The original uncorrected test D  was largely within the 3.75-6.25% range, slightly over-rejecting in small sample sizes for conditions with extreme nonnormality. The original robust test statistic 1RD was able to maintain Type I error rate, but under-rejected in all of the extremely nonnormal conditions regardless except the ones with N = 1000. 2RD  performed well and was within the 3.75-6.25% range almost always. The performance of the hybrid difference test HD was largely identical to that of 1RD , differing slightly in the smallest sample sizes. Again, for difference tests with 3-df, the original and hybrid robust statistics 1RD  and HD  exhibited more under-rejection when the model is larger. 2.1.3 Study 1A: Summary Overall, Type I error rates for the original robust difference test 1RD tended to be slightly below nominal rates in extremely nonnormal conditions, especially for difference tests with larger degrees of freedom (rejection rates range from 2-4%). The tendency for 1RD to under-reject seemed to improve with larger sample sizes. On the other hand, with the exception of a few conditions the new strictly positive test 2RD performed well under nonnormality. The Type I error rates for the hybrid difference test HD were largely identical  25to those of 1RD . The rejection rates for all four studied statistics across all conditions presented in Tables 11-16, 22-25 were between 2.5 and 7.8, suggesting that differences among them were not great and tended to involve under-rejection.  Interestingly, the uncorrected difference test D appeared to be fairly robust to nonnormality in the situation when the constraint is setting factor correlations to zero, performing similarly and at times even better than the three corrected tests. This is a novel finding and one for which a theoretical explanation has yet to be made. Of note, this robust behavior did not hold for the overall model test statistics that were used to compute the difference test D .  2.2 Study 1B: One-factor vs. two- or three-factor CFA models 2.2.1 Study 1B: Method Study 1B examined the test statistics comparing a 1-factor CFA model with less restricted 2- or 3-factor CFA models. These models can be viewed as nested in the parameter sense (Bentler & Bonett, 1980) if one conceptualizes a 1-factor model as a 2- or 3-factor model with factor correlations fixed to 1. The more restricted model 0M was a one-factor CFA model with 12 indicators. The less restricted models 1M were therefore a 2-factor model with 6 indicators per factor and a 3-factor model with 4 indicators per factor. Nonnormal data (with homogeneous kurtosis) were generated from the more restricted model 0M  with all loadings equal to .7. The corresponding difference tests thus had 1 or 3 degrees of freedom.     262.2.2 Study 1B: Results The results of this study are provided in tables in the Appendix section, and summarized in text. Rejection rates for the overall tests of model fit for this study were similar to those from Study 1A, and are not be presented here. Table 26 presents Type I error rates for the 1-df difference tests, obtained by fitting 2-factor models to the data. The uncorrected difference test D was no longer robust to nonnormality, and its Type I error rates ranged from 13-21%. The original robust difference test 1RD exhibited under-rejection in nearly all conditions, with Type I error rates in the 3-4% range. The strictly positive 2RD exhibited over-rejection in nearly all of the conditions with sample sizes of 400 and below, with Type I error rates in the 2-11% range. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD  except at N = 100, where some slight differences were observed. There was no appreciable difference in the performance of the robust statistics under moderate vs. extreme nonnormality. Table 27 presents Type I error rates for the 3-df difference tests, obtained by fitting 3-factor models to the data. The uncorrected difference test D was no longer robust to nonnormality, and its Type I error rates ranged from 23-35%. The original robust difference test 1RD under-rejected in a couple of the moderately nonnormal conditions, and in all of the extremely nonnormal conditions. Its Type I error rates ranged from 1-5%. The strictly positive 2RD was unable to maintain good Type I error rate in sample sizes of 400 or below, with rejection rates that ranged from 4-12%. The performance of 2RD  did not differ  27appreciably under moderate vs. extreme nonnormality. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD . The original uncorrected difference test D showed higher rejection rates for difference tests with higher degrees of freedom. The performance of the robust test statistics did not differ appreciably between the 1-df and 3-df conditions. The proportion of replications in which the 1RD statistic turned out negative was usually between 0.1% and 1% (see Table 28), and only occurred in the 2-df conditions. Of the 1RD  statistics that were negative, approximately half were rejected by HD . 2.2.3 Study 1B: Summary The uncorrected ML difference test D was no longer robust to nonnormality, and its Type I ranged from 13-35%. Under moderate nonnormality, the original robust difference test 1RD exhibited under-rejection in some conditions, whereas the strictly positive 2RD exhibited over-rejection in all of the conditions corresponding the smallest four sample sizes. Under extreme nonnormality, 1RD  under-rejected in every condition, while 2RD again over-rejected at N=100 to 400. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD  except at N = 100, where some inconsequential differences were observed.     28Chapter  3: Factor Loadings of CFA Models  This study involved applying constraints to the factor loadings of one- or two-factor CFA models.  3.1 Study 2A: One-factor CFA models with free vs. constrained factors loadings 3.1.1 Study 2A: Method Study 2A examined the difference test statistics comparing a 1-factor CFA model 1M  with all factor loadings freely estimated to a more restricted model 0M where 2, 3, 4, or 5 loadings were constrained to be equal. Nonnormal data were generated from a 1-factor model with five indicators, homogeneous kurtosis and factor loadings of .7. The difference tests thus had 1, 2, 3, or 4 degrees of freedom, corresponding to the number of constraints.   3.1.2 Study 2A: Results The results of this study are provided in tables in the Appendix section, and  summarized in text. Rejection rates for the overall test of model fit for each of the fitted models for moderate and extreme nonnormality are presented in Tables 29-30. The columns under URM , 1M , 2M , 3M and 4M correspond to the rejection rates of the unrestricted model and models with 1, 2, 3 and 4 constraints, respectively. Uncorrected and scaled absolute model chi-squares are presented. The uncorrected overall test of model fit performed poorly, as expected, over-rejecting in all conditions studied, with rejection rates between 27-76%. The scaled chi-square test of model fit performed considerably better, with rejection rates between 2-9%.    Tables 31-34 present the Type I error rates for the difference tests with 1-df to 4-df. Across all degrees of freedom, nonnormality conditions and sample sizes studied, the uncorrected difference test D was not robust to nonnormality, and its Type I error rates  29ranged from 19-61%. The original robust difference test 1RD exhibited under-rejection in many conditions, especially under extreme nonnormality. Its Type I error rates were between 2-7%. The strictly positive 2RD exhibited over-rejection in all but 1 condition, with Type I error rates in the 5-17% range. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD . Type I error rates tended to be higher for difference tests with higher degrees of freedom. There was no appreciable difference in the performance of the robust statistics under moderate vs. extreme nonnormality. The 1RD statistic turned out negative in 2 replications out of 1000 (see Table 35), occurring in the 1-df condition with N = 100. None of these negative 1RD  statistics were rejected by HD . 3.1.3 Study 2A: Summary Type I error rates for the uncorrected difference test D were high in all conditions, reaching 61% with extremely nonnormal data. The original robust statistic 1RD  under-rejected slightly, particularly with extremely nonnormal data and for tests with higher degrees of freedom. In contrast, the strictly positive 2RD over-rejected nearly all conditions, and these rejection rates were sometimes unacceptably high, reaching 17% with extremely nonnormal data. The performance of the hybrid test statistic HD was identical to that of 1RD. Overall, the performance of the strictly positive test 2RD  was found to be unacceptable in this study, and the original robust difference test or the hybrid procedure exhibit a clear advantage.   303.2 Study 2B: Two-factor CFA models with free loadings vs. loadings constrained equal across factors 3.2.1 Study 2B: Method Study 2B examined the robust difference test statistics comparing the less restricted 2-factor CFA model 1M where all factor loadings were freely estimated to the more restricted model 0M where one or more loading from each factor were constrained to be equal. The study had a fully crossed design with the following variables: model size (4, 5 or 6 indicators per factor); factor loadings (.5 or .7); degree of nonnormality (moderately or extremely nonnormal); type of nonnormality (homogeneous or heterogeneous univariate kurtosis). In addition, the value of the factor correlation was manipulated (0 or .3). Factor correlation was freely estimated in the fitted models. The data were generated from the 2-factor model where all factor loadings were equal (to .5 or to .7), so that the constraint imposed by 0M  would be true in the population. Difference tests were obtained from fitting to the data 2-factor models with and without constraints. For models with 4 indicators per factor, 0M had 1 to 4 loadings from each factor constrained to be equal, corresponding to difference tests with 1 to 4 degrees of freedom. For models with 5 indicators per factor, 0Mhad 1 to 5 loadings from each factor constrained to be equal, corresponding to difference tests with 1 to 5 degrees of freedom. Finally, for models with 6 indicators per factor, 0M had 1 to 6 loadings from each factor constrained to be equal, corresponding to difference tests with 1 to 6 degrees of freedom.    313.2.2 Study 2B: Results The results of this study are provided in tables in the Appendix section, and  summarized in text. Rejection rates for the overall tests of model fit for this study were similar to those from Study 2A, and are not presented here. Results for the models with 4, 5 and 6 indicators per factor are discussed in separate sections. Within each section, factor correlations equal to 0 are discussed first, followed by the results for the models with factor correlations equal .3.  3.2.2.1 Models with four indicators per factor 3.2.2.1.1 Factor correlation 0 Tables 36-43 present the Type I error rates for difference tests with1-df to 4-df, for models with 4 indicators per factor and factor correlations equal to 0. Results for factor loadings of .5 (Tables 36-39) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 40-43).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 10-32%. The original robust difference test 1RD performed well, exhibiting under-rejection in a few conditions, especially under extreme nonnormality. Its Type I error rates were between 3-8%. The strictly positive 2RD also performed well, with Type I error rates in the 4-9% range. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some small differences in the lowest sample sizes. The performance of HD differed from 1RD only for 1- 32df and 2-df tests. There was no appreciable difference in the performance of the robust statistics across the degrees of freedom nor the types of nonnormality studied.  For conditions with factor loadings of .7, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 18-59%. The original robust difference test 1RD maintained good Type I error rate (2-8%), but exhibited quite a bit of under-rejection, especially in extremely nonnormal conditions. The strictly positive 2RD performed well in general, occasionally over-rejecting in small sample sizes and under-rejecting with extremely nonnormal data. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD . The robust statistics 1RD  and HD  tended to under-reject more with higher degrees of freedom, but there was no appreciable difference in their performance with homogeneous vs. heterogeneous kurtosis. Overall, the statistics studied performed similarly across the two levels of factor loadings studied.  Table 44 presents the proportion of the 1RD statistic that was negative out of 1000 replications. Only the 1-df conditions are presented because the performance of 1RD  and HD  only differed in tests with smaller degrees of freedom. The proportion of 1RD  that were negative was typically in the 0.1-2% range, and occurred mostly in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly a third of the time. 3.2.2.1.2 Factor correlation .3 Tables 45-52 present the Type I error rates for difference tests with1-df to 4-df, for models with 4 indicators per factor and factor correlations equal to .3. Results for factor  33loadings of .5 (Tables 45-48) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 49-52).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 11-29%. The original robust difference test 1RD was able to maintain good Type I error rate, but it exhibited under-rejection in quite a few conditions, especially in extremely nonnormal conditions with higher degrees of freedom. Its Type I error rates were between 2-7%. The strictly positive 2RD generally performed well, with Type I error rates in the 3-8% range. However, it tended to over-reject slightly in the conditions with moderate nonnormality and heterogeneous kurtosis. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some small difference in the lowest sample sizes. The performance of HD differed from 1RDonly for 1-df and 2-df tests. There was no appreciable difference in the performance of the robust statistics across the degrees of freedom studied nor the type of nonnormality. The robust statistics 1RD  and HD  tended to under-reject more with higher degrees of freedom. For conditions with factor loadings of .7, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 10-60%. The original robust difference test 1RD maintained good Type I error rate, but under-rejected in quite a few conditions, especially in extremely nonnormal conditions with homogeneous kurtosis. Its Type I error rates were between 2-6%. The under-rejection exhibited by 1RD occurred more often with  34higher degrees of freedom. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range in nearly all conditions. The Type I error rates for the hybrid test statistic HD were identical to those of 1RD . Overall, the statistics studied performed similarly across the two levels of factor loadings studied.  Table 53 presents the proportion of the 1RD statistic that was negative out of 1000 replications. Only the 1-df conditions are presented because the performance of 1RD  and HD  only differed in tests with smaller degrees of freedom. The proportion of 1RD that was negative was typically in the 0.1-2% range, and occurred mostly in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly a third of the time. 3.2.2.2 Models with five indicators per factor 3.2.2.2.1 Factor correlation 0 Tables 52-63 present the Type I error rates for difference tests with1-df to 5-df, for models with 5 indicators per factor and factor correlations equal to 0. Results for factor loadings of .5 (Tables 52-58) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 59-63).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 10-35%. The original robust difference test 1RD maintained good Type I error rate, but under-rejected in quite a few conditions, especially in extremely nonnormal conditions. Its Type I error rates were between 2-7%. The strictly positive 2RD performed quite well, but tended to over-reject at small sample sizes. Its  35Type I error rates were in the 3-10% range. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. Overall, the robust statistics 1RD  and HD  tended to under-reject slightly more in difference tests with higher degrees of freedom. The under-rejection is more pronounced with higher degrees of nonnormality and in conditions with heterogeneous kurtosis. The performance of HD differed from 1RD only for 1-df tests. For conditions with factor loadings of .7, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 18-71%. The original robust difference test 1RD maintained good Type I error rate, but under-rejected in quite a few extremely nonnormal conditions, having Type I error rates between 2-7%. The under-rejection was more pronounced with higher degrees of freedom. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range in nearly all conditions for difference tests with 1-4 degrees of freedom. For difference tests with 5-df, however, 2RD  tended to over-reject slightly in moderately nonnormal conditions with homogeneous kurtosis. Its Type I error rates were in the 3-8% range. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df and 2-df tests. Overall, the statistics studied performed similarly across the two levels of factor loadings studied.   36Table 64 presents the proportion of the 1RD statistic that was negative out of 1000 replications. Only the 1-df conditions are presented because the performance of 1RD  and HD  only differed in tests with smaller degrees of freedom. The proportion of negative 1RD  was typically in the 0.1-1% range, and occurred mostly in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly half of the time. 3.2.2.2.2 Factor correlation .3 Tables 65-74 present the Type I error rates for difference tests with1-df to 5-df, for models with 5 indicators per factor and factor correlations equal to .3. Results for factor loadings of .5 (Tables 65-69) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 70-74).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 11-34%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 2-6%), but under-rejected in the majority of conditions. The under-rejection was especially pronounced for tests with higher degrees of freedom. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range in nearly all conditions, except in small sample sizes, where it sometimes over-rejected slightly. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df and 2-df tests. For conditions with factor loadings of .7, the uncorrected difference test D was not   37robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 17-68%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 1-5%), but had a tendency to under-reject. The under-rejection occurred more with higher degrees of freedom and extremely nonnormal data. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range almost always, except in a few conditions where it sometimes over-rejected slightly. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df and 2-df tests. The performance of the three robust statistics did not differ based on the type of nonnormality. Overall, the statistics studied performed similarly across the two levels of factor loadings studied.  Table 75 presents the proportion of the 1RD statistic that was negative out of 1000 replications. Only the 1-df conditions are presented because the performance of 1RD  and HD  only differed in tests with smaller degrees of freedom. The proportion of 1RD that was negative was typically in the 0.1-1.5% range, and occurred mostly in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly a third of the time.    383.2.2.3 Models with six indicators per factor 3.2.2.3.1 Factor correlation 0 Tables 76-87 present the Type I error rates for difference tests with1-df to 6-df, for models with 6 indicators per factor and factor correlations equal to 0. Results for factor loadings of .5 (Tables 76-81) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 82-87).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 11-42%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 1-4%), but had a tendency to under-reject. The under-rejection occurred more with higher degrees of freedom and extremely nonnormal data. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range except in the smaller sample sizes, where it occasionally over-rejected slightly. Its performance did not appear to depend on the degree of freedom of the test, nor on the level or type of nonnormality. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df and 2-df tests. For conditions with factor loadings of .7, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 19-75%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 1-5%), but it under-rejected quite a bit. The under-rejection occurred more often when the tests had higher  39degrees of freedom and when the data were extremely nonnormal. The strictly positive 2RD performed quite well, with Type I error rates in the 3.75-6.25% range except in the smaller sample sizes, where it occasionally over-rejected slightly. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df tests. The performance of the three robust test statistics did not differ appreciably with the type of nonnormality. 3.2.2.3.2 Factor correlation .3 Tables 89-100 present the Type I error rates for difference tests with1-df to 6-df, for models with 6 indicators per factor and factor correlations equal to .3. Results for factor loadings of .5 (Tables 89-95) are discussed first, followed by a comparison with the results for factor loadings of .7 (Tables 96-100).  For conditions with factor loadings of .5, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 11-40%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 2-6%), but it under-rejected quite a bit when the tests had 2 or more degrees of freedom. The strictly positive 2RD performed well overall, with Type I error rates in the 3-8% range, but almost always over-rejected slightly in sample sizes N = 100 and 200. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df and 2-df  40tests. The performance of the three robust test statistics did not differ with the type of nonnormality. For conditions with factor loadings of .7, the uncorrected difference test D was not  robust to nonnormality across all degrees of freedom, nonnormality conditions and sample sizes studied. Its Type I error rates ranged from 19-74%. The original robust difference test 1RD was able to maintain the Type I error rates (they ranged from 2-6%), but it under-rejected quite a bit under extreme nonnormality and when the tests had 2 or more degrees of freedom. The strictly positive 2RD performed well overall, with Type I error rates in the 3.75-6.25% range almost always. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor difference in the smallest sample sizes, and only for tests with 1-df. The performance of the three robust test statistics did not differ appreciably with the type of nonnormality. Table 101 presents the proportion of the 1RD statistic that was negative out of 1000 replications. Only the 1-df conditions are presented because the performance of 1RD  and HD  only differed in tests with smaller degrees of freedom. The proportion of 1RD that was negative was typically in the 0.1-1.5% range, and occurred mostly in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly a third of the time. 3.2.3 Study 2B: Summary The uncorrected ML difference test D had very high rejection rates under moderate and extreme nonnormality, with no appreciable difference between homogeneous and heterogeneous kurtosis conditions. The original robust statistic 1RD was able to maintain a  41good Type I error rate, but under-rejected quite a lot in extremely nonnormal conditions. The strictly positive 2RD also performed well, but occasionally over-rejected in small sample sizes. The hybrid difference test HD performed very similarly to 1RD . The performance of the three robust difference statistics did not differ appreciably across the degrees of freedom studied. They also performed similarly across the two levels of factor loadings studied.     42Chapter  4: Two-factor CFA models with and without crossloadings 4.1 Study 3: Method Study 3 examined the robust difference test statistics comparing 2-factor CFA models with or without crossloadings. The less restricted model 1M was a 2-factor CFA model with orthogonal factors and with 1 to 4 additional estimated crossloadings, which were added to alternating factors. The more restricted model 0M was the same but did not estimate any crossloadings. Nonnormal data were generated from the more restricted model 0M with homogeneous kurtosis and loadings equal to .7; that is, the crossloadings estimated by 1M  were zero in the population. The corresponding difference tests thus had 1, 2, 3 or 4 degrees of freedom, corresponding to the number of estimated crossloadings in 1M .     4.2 Study 3: Results The results of this study are provided in tables in the Appendix section, and  summarized in text. Rejection rates for the overall tests of model fit for each of the fitted models in this study are presented in Tables 102-105. The columns under 0M , 1M , 2M , 3M and 4M correspond to the rejection rates of the uncorrected and scaled absolute model chi-square of the restricted model 0M , and the models with 1 to 4 crossloadings, respectively. Across the range of factor loadings, sample sizes, and nonnormality conditions studied, uncorrected rejection rates of the model chi-square ranged from 25-49%. The uncorrected chi-square statistics performed poorly, as expected, over-rejecting models in every condition. Rejection rates were higher for models with fewer constraints, and for those  43with extremely nonnormal data. The scaled model chi-square was able to correct somewhat for this over-rejection, having rejection rates in the 4-14% range. Type I error rates for the difference tests with 1-df to 4-df are presented in Tables 106-109, respectively. The uncorrected difference test D seemed robust to nonnormality across the conditions studied, though it over-rejected slightly in the smaller sizes, and slightly more with higher degrees of freedom. Its Type I error rates ranged from 4-9% across all conditions. The original robust difference test 1RD and the strictly positive 2RD both performed well across the conditions studied, with rejection rates in the 3.75-6.25% range almost always. The Type I error rates for the hybrid test statistic HD were nearly identical to those of 1RD , except for some minor differences in the smallest sample sizes. The performance of HD differed from 1RD only for 1-df tests. The performance of the three robust test statistics did not differ appreciably with the degree of nonnormality, type of nonnormality, nor degrees of freedom of the tests. Table 110 presents the proportion of the 1RD  statistic that was negative out of 1000 replications. The proportion of 1RD  that were negative was typically in the 0.1-1% range, and occurred mostly in tests with 1- or 2-df, and in smaller sample sizes. These negative 1RD  statistics were rejected by HD  roughly half of the time.  4.3 Study 3: Summary As in Study 1A, the uncorrected ML difference test D again appears to be robust to nonnormality in this study. While it over-rejects models in smaller samples, its Type I error rates are not nearly as inflated in Studies 2 and 3, and the over-rejection disappears in larger  44sample sizes. The original robust statistic 1RD performed well under both moderate and extreme nonnormality. The strictly positive 2RD performed about the same as the uncorrected difference test. Type I error rates for HD were largely identical to those for 1RD . The performance of the robust statistics was similar across the range of degrees of freedom studied. 45Chapter  5: Conclusion and Overall Discussion 5.1 Performance of the Uncorrected Chi-Square Difference Test The goal of the present study was to provide a much needed investigation of the empirical performance of robust chi-square difference tests with nonnormal data. As this was the first study of the robust difference tests, the investigation was limited to CFA models in the single-group context. Two versions of the robust chi-square test statistics proposed by Satorra and Bentler (2001; 2010), 1RD and 2RD , were studied, which have not been previously evaluated beyond a limited demonstration provided in the original articles, as well as a hybrid test statistic HD , proposed by Asparouhov & Muth?n (2010). The uncorrected ML difference test, D , was included to investigate the effects of not correcting for nonnormality. A variety of constraints and types of nonnormality were studied.  The results of the present study reveal that the uncorrected ML difference test D did not perform well with nonnormal data. This is not surprising given that this test was developed under the assumption of normality. While this was the first study to empirically evaluate the performance of the uncorrected difference test D , the uncorrected overall test of model fit T computed under the assumption of normality (the ML chi-square statistic) has been studied extensively with nonnormal data, and is known to exhibit highly inflated rejection rates (e.g., Chou, Bentler, & Satorra, 1991; Bentler & Yuan, 1999; Satorra & Bentler, 1994). It was thus expected that a similar pattern would be found for the uncorrected difference test D , and this was consistent with the results. However, there was an exception to this pattern, which appeared to be limited to the conditions where the constraint distinguishing 0M and 1M involved setting a model parameter to zero, as in Studies 1A  46and 3. In both of these studies, the uncorrected test D was found to be fairly robust to nonnormality, especially with increasing sample sizes, even while the overall tests of model fit for either model were exhibiting quite inflated rejection rates. At present, a theoretical explanation for this intriguing finding has not yet been made.  5.2 Performance of the Robust Chi-Square Difference Tests Overall, the remaining corrected differences tests, 1RD , 2RD and HD , all performed better than the uncorrected difference test, but there were some important differences among them. In particular, the strictly positive difference test 2RD  had a tendency in some cases to over-reject the null hypothesis. This was true when constraining factor correlations to 1 in sample sizes below 500 (Study 1B), constraining factor loadings within a single factor to be equal to each other (Study 2A), and when applying constraints on models with cross-loadings in sample sizes below 500 (Study 3). In contrast, both 1RD and HD  maintained good Type I error rates in those conditions, but also sometimes had a slight tendency to under-reject the null hypothesis when testing whether factor correlations were different from 0 (Study 1A),  and when constraining factor loadings within a single factor to be equal to each other (Study 2A). This tendency was more pronounced with highly nonnormal data. Since Type I error rates slightly below nominal are less problematic than inflated Type I error rates, the original corrected difference test, 1RD , or the hybrid test, HD , are tentatively recommended for general use.  In these simulations, the results for HD  were nearly indistinguishable from 1RD , differing only slightly in small samples. These statistics were the same whenever 1RD  is  47positive. The proportion of replications in which 1RD statistic turned out negative was usually between 0.1% and 1%, and never higher than 2%. The negative values of 1RD occurred most frequently in sample sizes of 100 to 300. The proportion of negative 1RD  values was higher in extremely nonnormal conditions with heterogeneous kurtosis, and for smaller degrees of freedom. Of the cases with negative 1RD , typically one-third to two-thirds were rejected by HD. Thus, while the decisions to accept or reject the model based on 1RD  and HD  did not agree perfectly when 1RD was negative, there were too few negative cases for the differences between these statistics to matter. The simpler strategy of rounding the difference test to the nearest admissible value of zero works just as well as the more sophisticated procedure of re-computing the test using different formulae to ensure it is positive.     Additionally, the low proportion of negative instances of 1RD  implies that the differences between 1RD and 2RD  cannot be explained by what happens with the negative cases alone, as the difference in rejection rates would never be greater than 2%, and were typically much less. Thus, the sometimes inflated rejection rates observed for 2RD  were due to the fact that it produced a different value when 1RD  is positive, as well. While 2RD  was proposed to address the issue of  1RD occasionally taking on negative values, it may have introduced unforeseen problems. However, these results are preliminary as this was the first study of these test statistics.     485.3 Future Directions While these difference tests have been compared under diverse simulation conditions, future research will determine whether the results generalize to other types of models and data. Since this was the first study to examine these robust test statistics, only Type I error rates were studied. An obvious next step is to study power. Another limitation of the present study is that only single-group CFA models were considered. Multiple group models are a common application of difference tests, and should be investigated in future studies.  It is also worth examining the performance of these test statistics when applying them to both correctly specified and misspecified models. Only correctly specified models were examined in the present study, but in practice, researchers commonly apply difference tests to misspecified models.  Additionally, the method that was used to generate nonnormal data (i.e., Vale & Maurelli, 1983) was quite common in studies that investigate the robustness of test statistics and standard errors to violation of the assumption of nonnormality (e.g., Curran, West, & Finch, 1996; Rhemtulla, Brosseau-Liard, & Savalei, 2012; Savalei & Bentler, 2005). In this way, these results provide continuity with previous research. Other methods for generating nonnormal data exist (e.g., Mair, Satorra, & Bentler, 2012) and might be worth investigating further. 5.4 Conclusion In general, all three robust tests 1RD , 2RD and HD  performed better than the uncorrected difference test, with a few exceptions. Due to the tendency of the strictly positive 2RD  to over-reject the null hypothesis, the original corrected difference test, 1RD , or the  49hybrid test, HD , are tentatively recommended for general use. Researchers should keep in mind, however, that these statistics have a tendency to under-reject the null hypothesis.    50References Anderson, J. C. & Gerbing, D. W. (1984). The effect of sampling error on convergence, improper solutions, and goodness-of-fit indices for maximum likelihood confirmatory factor analysis. Psychometrika, 49, 155-173.  Amemiya, Y., & Anderson, T. W. (1990). Asymptotic chi-square tests for a large class of factor analysis models. Annals of Statistics, 18(3), 1453-1463. Asparouhov, T., & Muth?n, B. (2010). Computing the strictly positive Satorra-Bentler chi-square test in Mplus. Mplus Web Notes: No. 12. January 24, 2012. Bentler, P.M., & Bonett, D.G. (1980). Significance tests and goodness of fit in the analysis of covariance structures, Psychological Bulletin, 88, 588-606. Bentler, P. M., & Dijkstra, T. (1985). Efficient estimation via linearization in structural models. Multivariate analysis VI, 9-42. Bentler, P. M. (2006). EQS 6 Structural Equations Program Manual. Encino, CA: Multivariate Software, Inc.  Bentler, P. M., & Yuan, K. H. (1999). Structural equation modeling with small samples: Test statistics. Multivariate Behavioral Research, 34(2), 181-197. Biesanz, J. C., & West, S. G. (2004).  Towards understanding assessments of the Big Five: Multitrait-multimethod analyses of convergent and discriminant validity across measurement occasion and type of observer. Journal of Personality, 72, 845-876. Bollen, K. A. (1989). Structural Equations with Latent Variables. New York, NY: John Wiley & Sons. Browne, M. W. (1984). Asymptotically distribution?free methods for the analysis of covariance structures. British Journal of Mathematical and Statistical  51Psychology, 37(1), 62-83. Browne, M. W. (1974). Generalized least-squares estimators in the analysis of covariance structures. South African Statistical Journal, 8, 1-24. Byrne, B.M., & Campbell, T.L. (1999). Cross-cultural comparisons and the presumption of equivalent measurement and theoretical structure: A look beneath the surface, Journal of Cross-Cultural Psychology, 30, 555-574. Byrne, B.M., Shavelson, R.J., & Muth?n, B. (1989). Testing for the equivalence of factor covariance and mean structures: The issue of partial measurement invariance. Psychological Bulletin, 105, 456-466. Chou, C.P., Bentler, P.M., & Satorra, A. (1991). Scaled test statistics and robust standard errors for non-normal data in covariance structure analysis: A Monte Carlo study. British Journal of Mathematical and Statistical Psychology, 44, 347-357. Curran, P.J., West, S.G., & Finch, J.F. (1996). The robustness of test statistics to nonnormality and specification error in confirmatory factor analysis. Psychological Methods, 1, 16-29. van Dijk, M., de Boer, J. B., Koot, H. M., Tibboel, D., Passchier, J., & Duivenvoorden, H. J. (2000). The reliability and validity of the COMFORT scale as a postoperative pain instrument in 0 to 3-year-old infants. Pain, 84, 367. DeYoung, C. G. (2006). Higher-order factors of the Big Five in a multi-informant sample. Journal of Personality and Social Psychology, 91, 1138-1151.   Engle, R. F. (1984). Wald, likelihood ratio, and Lagrange Multipler tests in econometrics. In A. Griliches & M. D. Intriligator (Eds.), Handbook of econometrics (pp. 776-826). Amsterdam: North Holland.  52Fan, X., Thompson, B., & Wang, L. (1999). Effects of sample size, estimation methods, and model specification on structural equation modeling fit indexes. Structural Equation Modeling: A Multidisciplinary Journal, 6, 56-83.  Fouladi, R.T. (2000). Performance of modified test statistics in covariance and correlation structure analysis under conditions of multivariate nonnormality. Structural Equation Modeling, 7, 356-410. Fleishman, A.I. (1978). A method for simulating non-normal distributions. Psychometrika, 43, 521-532. Hu, L.-T., Bentler, P.M., & Kano, Y. (1992). Can test statistics in covariance structure analysis be trusted? Psychological Bulletin, 112, 351-362. Huang, C.-L., Lin, H.-H., & Wang, H.-H. (2006). The psychometric properties of the Chinese version of the Fagerstrom test for nicotine dependence. Addictive Behaviors, 31, 2324-2327.  J?reskog, K. G. (1970). A general method for the analysis of covariance structures. Biometrika, 57, 239-251. J?reskog, K. G. (1978). Structural analysis of covariance and correlation matrices. Psychometrika, 43, 443-477.  J?reskog, K. G. (1981). Analysis of covariance structures. Scandinavian Journal of Statistics, 8, 65-92. Lee, S.-Y. (1977). Some algorithms for covariance structure analysis (Doctoral dissertation, University of California, Los Angeles). Dissertation Abstracts International, 38, 480B-481B. (University Microfilms No. 77-17,230). Lee, S.-Y. (1985). On testing functional constraints in structural equation models.  53Biometrika, 72, 125-131. Leite, W. L., Svinicki, M., & Shi, Y. (2010). Attempted validation of the scores of the VARK: Learning styles inventory with multitrait?multimethod confirmatory factor analysis models. Educational and Psychological Measurement, 70, 323-339. Li, F., Harmer, P., Duncan, T. E., Duncan, S. C., Acock, A., & Yamamoto, T. (1998). Confirmatory factor analysis of the task and ego orientation in sport questionnaire with cross-validation. Research Quarterly for Exercise and Sport, 69, 276-283. Mair, P., Satorra, A., & Bentler, P. M. (2012). Generating nonnormal multivariate data using copulas: Applications to SEM. Multivariate Behavioral Research, 47, 547-565. Rhemtulla, M., Brosseau-Liard, P., & Savalei, V. (2012). How many categories is enough to treat data as continuous? A comparison of robust continuous and categorical SEM estimation methods under a range of non-ideal situations. Psychological Methods. Advance online publication. doi: 10.1037\/a0029315. Satorra, A. (1989). Alternative test criteria in covariance structure analysis: A unified approach. Psychometrika, 54, 131-151. Satorra, A. (1990). Robustness issues in structural equation modeling: A review of recent developments. Quality & Quantity, 24, 367-386. Satorra, A. (2000). Scaled and adjusted restricted tests in multi-sample analysis of moment structures. In D.D.H. Heijmans & D.S.G. Pollock, A. Satorra (Eds.), Innovations in multivariate statistical analysis: a festschrift for Heinz Neudecker (pp. 223-247). Dordrecht: Kluwer Academic. Satorra, A., & Bentler, P.M. (1988). Scaling corrections for chi-square statistics in covariance structure analysis. ASA Proceedings of the Business and Economic Section,  54308-313. Satorra, A., & Bentler, P.M. (1994). Corrections to test statistics and standard errors in covariance structure analysis. In A. von Eye && C.C. Clogg (Eds.) Latent variable analysis: applications for developmental research (pp. 399-419). Thousand Oaks: Sage.  Satorra, A. (2000). Scaled and adjusted restricted tests in multi-sample analysis of moment structures (pp. 233-247). Springer US.  Satorra, A., & Bentler, P. M. (2001). A scaled difference chi-square test for moment structure analysis. Psychometrika, 66, 507-514. Satorra, A., & Bentler, P.M. (2010). Ensuring positiveness of the scaled difference chi-square test statistic. Psychometrika, 75, 243-248. Savalei, V., & Bentler, P.M. (2005). A statistically justified pairwise ML method for incomplete nonnormal data: A comparison with direct ML and pairwise ADF. Structural Equation Modeling, 12, 183-214. Savalei, V., & Falk, C. (in press). Robust two-stage approach outperforms robust FIML with incomplete nonnormal data. Structural Equation Modeling. Savalei, V. (2010). Small sample statistics for incomplete nonnormal data: extensions of complete data formulae and a Monte Carlo comparison. Structural Equation Modeling: A Multidisciplinary Journal, 17, 241-264.  55Serlin, R. C. (2000). Testing for robustness in Monte Carlo studies. Psychological Methods,  5, 230?240. Serlin, R. C., & Lapsley, D. K. (1985). Rationality in psychological research: The good- enough principle. American Psychologist, 40, 73?83. Shapiro, A. (1985). Asymptotic equivalence of minimum discrepancy function estimators to  GLS estimators. South African Statistical Journal, 17, 33-81. Steiger, J. H., Shapiro, A., & Browne, M. W. (1985). On the multivariate asymptotic distribution of sequential chi-square statistics. Psychometrika, 50, 253-264.  Vale, C.D., & Maurelli, V.A. (1983). Simulating multivariate nonnormal distributions. Psychometrika, 48, 465-471.   Wald, A. (1943). Tests of statistical hypotheses concerning several parameters when the number of observations is large. Transactions of the American Mathematical Society, 426-482. Yuan, K.-H., & Bentler, P. M. (2004). On chi-square difference and z tests in mean and covariance structure analysis when the base model is misspecified. Educational and Psychological Measurement, 64, 737-757.  56Appendix Tables for Study 1A      Results for two-factor models     57 58   59  60 61 62 63  64  65  66  67  68  69  70   71     Results for three-factor models   72   73  74  75  76  77Tables for Study 1B     78      79Tables for Study 2A    80   81   82    83Tables for Study 2B      Results for models with four indicators per factor   84   85  86  87  88  89  90       91   92  93  94 95 96 97  98 99  100         101     Results for models with five indicators per factor       102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121   122   123     Results for models with six indicators per factor  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148    149Tables for Study 3   150   151  152  153  154  155 ","attrs":{"lang":"en","ns":"http:\/\/www.w3.org\/2009\/08\/skos-reference\/skos.html#note","classmap":"oc:AnnotationContainer"},"iri":"http:\/\/www.w3.org\/2009\/08\/skos-reference\/skos.html#note","explain":"Simple Knowledge Organisation System; Notes are used to provide information relating to SKOS concepts. There is no restriction on the nature of this information, e.g., it could be plain text, hypertext, or an image; it could be a definition, information about the scope of a concept, editorial information, or any other type of information."}],"Genre":[{"label":"Genre","value":"Thesis\/Dissertation","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/hasType","classmap":"dpla:SourceResource","property":"edm:hasType"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/hasType","explain":"A Europeana Data Model Property; This property relates a resource with the concepts it belongs to in a suitable type system such as MIME or any thesaurus that captures categories of objects in a given field. It does NOT capture aboutness"}],"GraduationDate":[{"label":"Graduation Date","value":"2013-11","attrs":{"lang":"en","ns":"http:\/\/vivoweb.org\/ontology\/core#dateIssued","classmap":"vivo:DateTimeValue","property":"vivo:dateIssued"},"iri":"http:\/\/vivoweb.org\/ontology\/core#dateIssued","explain":"VIVO-ISF Ontology V1.6 Property; Date Optional Time Value, DateTime+Timezone Preferred "}],"IsShownAt":[{"label":"DOI","value":"10.14288\/1.0074116","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/isShownAt","classmap":"edm:WebResource","property":"edm:isShownAt"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/isShownAt","explain":"A Europeana Data Model Property; An unambiguous URL reference to the digital object on the provider\u2019s website in its full information context."}],"Language":[{"label":"Language","value":"eng","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/language","classmap":"dpla:SourceResource","property":"dcterms:language"},"iri":"http:\/\/purl.org\/dc\/terms\/language","explain":"A Dublin Core Terms Property; A language of the resource.; Recommended best practice is to use a controlled vocabulary such as RFC 4646 [RFC4646]."}],"Program":[{"label":"Program (Theses)","value":"Psychology","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#degreeDiscipline","classmap":"oc:ThesisDescription","property":"oc:degreeDiscipline"},"iri":"https:\/\/open.library.ubc.ca\/terms#degreeDiscipline","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the program for which the degree was granted."}],"Provider":[{"label":"Provider","value":"Vancouver : University of British Columbia Library","attrs":{"lang":"en","ns":"http:\/\/www.europeana.eu\/schemas\/edm\/provider","classmap":"ore:Aggregation","property":"edm:provider"},"iri":"http:\/\/www.europeana.eu\/schemas\/edm\/provider","explain":"A Europeana Data Model Property; The name or identifier of the organization who delivers data directly to an aggregation service (e.g. Europeana)"}],"Publisher":[{"label":"Publisher","value":"University of British Columbia","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/publisher","classmap":"dpla:SourceResource","property":"dcterms:publisher"},"iri":"http:\/\/purl.org\/dc\/terms\/publisher","explain":"A Dublin Core Terms Property; An entity responsible for making the resource available.; Examples of a Publisher include a person, an organization, or a service."}],"Rights":[{"label":"Rights","value":"Attribution 2.5 Canada","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/rights","classmap":"edm:WebResource","property":"dcterms:rights"},"iri":"http:\/\/purl.org\/dc\/terms\/rights","explain":"A Dublin Core Terms Property; Information about rights held in and over the resource.; Typically, rights information includes a statement about various property rights associated with the resource, including intellectual property rights."}],"RightsURI":[{"label":"Rights URI","value":"http:\/\/creativecommons.org\/licenses\/by\/2.5\/ca\/","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#rightsURI","classmap":"oc:PublicationDescription","property":"oc:rightsURI"},"iri":"https:\/\/open.library.ubc.ca\/terms#rightsURI","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the Creative Commons license url."}],"ScholarlyLevel":[{"label":"Scholarly Level","value":"Graduate","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#scholarLevel","classmap":"oc:PublicationDescription","property":"oc:scholarLevel"},"iri":"https:\/\/open.library.ubc.ca\/terms#scholarLevel","explain":"UBC Open Collections Metadata Components; Local Field; Identifies the scholarly level of the author(s)\/creator(s)."}],"Title":[{"label":"Title ","value":"Investigation of Type I error rates of three versions of robust chi-square difference tests in structural equation modeling","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/title","classmap":"dpla:SourceResource","property":"dcterms:title"},"iri":"http:\/\/purl.org\/dc\/terms\/title","explain":"A Dublin Core Terms Property; The name given to the resource."}],"Type":[{"label":"Type","value":"Text","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/type","classmap":"dpla:SourceResource","property":"dcterms:type"},"iri":"http:\/\/purl.org\/dc\/terms\/type","explain":"A Dublin Core Terms Property; The nature or genre of the resource.; Recommended best practice is to use a controlled vocabulary such as the DCMI Type Vocabulary [DCMITYPE]. To describe the file format, physical medium, or dimensions of the resource, use the Format element."}],"URI":[{"label":"URI","value":"http:\/\/hdl.handle.net\/2429\/44856","attrs":{"lang":"en","ns":"https:\/\/open.library.ubc.ca\/terms#identifierURI","classmap":"oc:PublicationDescription","property":"oc:identifierURI"},"iri":"https:\/\/open.library.ubc.ca\/terms#identifierURI","explain":"UBC Open Collections Metadata Components; Local Field; Indicates the handle for item record."}],"SortDate":[{"label":"Sort Date","value":"2013-12-31 AD","attrs":{"lang":"en","ns":"http:\/\/purl.org\/dc\/terms\/date","classmap":"oc:InternalResource","property":"dcterms:date"},"iri":"http:\/\/purl.org\/dc\/terms\/date","explain":"A Dublin Core Elements Property; A point or period of time associated with an event in the lifecycle of the resource.; Date may be used to express temporal information at any level of granularity. Recommended best practice is to use an encoding scheme, such as the W3CDTF profile of ISO 8601 [W3CDTF].; A point or period of time associated with an event in the lifecycle of the resource.; Date may be used to express temporal information at any level of granularity. Recommended best practice is to use an encoding scheme, such as the W3CDTF profile of ISO 8601 [W3CDTF]."}]}