modSel/DESCRIPTION0000644000076500000240000000143613211325404012540 0ustar00CiCistaffPackage: modSel Type: Package Title: Model Selection Version: 0.1.0 Authors@R: c(person("ChengCheng", "Zhai", email = "czhai@fandm.edu", role = c("cre", "aut")), person("Danel", "Draguljić", email = "danel.draguljic@fandm.edu", role = "aut")) Maintainer: ChengCheng Zhai Description: Provides a model selection method that keeps the hierarchy of the terms for both linear fixed and mixed regression models. Different model selection criterions can be used such as AIC, BIC, likelihood ratio test, and you can perform the selection forward and backward. License: GPL-2 Encoding: UTF-8 LazyData: true RoxygenNote: 6.0.1 Suggests: MASS Imports: lme4, stringr NeedsCompilation: no Packaged: 2017-12-04 20:07:32 UTC; CiCi Author: ChengCheng Zhai [cre, aut], Danel Draguljić [aut] modSel/NAMESPACE0000644000076500000240000000023713151652720012256 0ustar00CiCistaffexportPattern("^[[:alpha:]]+") importFrom ("stats", "anova", "as.formula", "complete.cases", "drop1", "lm", "qnorm") import(stringr) import(lme4) modSel/R/0000755000076500000240000000000013211325404011227 5ustar00CiCistaffmodSel/R/modSel_final.R0000644000076500000240000014622613211325353013764 0ustar00CiCistaff#' @export modelSelection <- function(dat, response, random = NULL, fixed, keep = NULL, direction, method = "AIC", alpha = 0.05, sinkit = NULL){ if (method == "LRT"){ methodLRT = method method = "logLik" } overallT <- Sys.time() if (missing(response)) stop("A response is required and not provided.") if(!(method %in% c("AIC", "BIC", "logLik", "tValue")) & !is.null(random)){ warning("Please choose AIC, BIC, LRT, or tValue for method.") break } if(!(direction %in% c("forward", "backward"))){ warning("Please choose a direction: 'forward' or 'backward'.") break } namesOfData = colnames(dat) main <- fixed[str_count(fixed, ":")== 0 & grepl('\\^', fixed)==FALSE] if (all(is.element(main, namesOfData)) == FALSE){ wrongName = main[which(is.element(main, namesOfData) == FALSE)] cat("The variable ", wrongName, " is not a variable in your data file. \n", sep = "") } fmissing <- sapply(dat, function(x) sum(is.na(x))) missingCol = namesOfData[which(fmissing != 0)] missingV = missingCol[which(is.element(missingCol, fixed) == TRUE)] if (length(missingV) != 0){ cat("There are some missing values in the variables in your model and they will be removed. \n") print(fmissing[missingV]) loc = match(missingV, namesOfData) dat = dat[complete.cases(dat[unlist(loc)]), ] } n = dim(dat)[1] p = length(fixed) if (n < p){ cat("There are ", p, " terms in your model, ", "however, you only have ", n, " data points. The model cannot be applied.", sep = "") } if (!is.null(sinkit)){ sink(sinkit) } # get all the terms this interaction is involved with: # x is a term, y is the domain of the terms to check fInteractions = function(x, y){ checkeach = sapply(unlist(strsplit(x, ":")), grepl, y) y[apply(checkeach, 1, all)] } # get all the terms this polynomial is involved with fpolynomials = function(x, y){ y[which(grepl(substr(strsplit(x, '\\^')[[1]][1], 3, nchar(x)), y))] } # get all the terms this main effect is involved with fMainInOthers = function(x,y){y[which(grepl(x,y))]} # Change the variable names: lm - cate1,cate2... # so, fit an anova first to just get cate with no subscripts # but anova deals with the names differently than lm, # so we then fit a drop1() to match the names modelNAMES = lm(as.formula(paste(response, " ~ ", paste(fixed, collapse = "+"))), data = dat) fixed = row.names(anova(modelNAMES))[1:length(fixed)] modelNAMES = lm(as.formula(paste(response, " ~ ", paste(fixed, collapse = "+"))), data = dat) fixed = row.names(drop1(modelNAMES, .~., test = "F"))[-1] main <- fixed[str_count(fixed, ":")== 0 & grepl('\\^', fixed)==FALSE] # document this fixed as fixed1 for the future # when checking if there is a categorical variable fixed1 = fixed # fix the orders in the interaction terms in "keep" keep1 = keep[which(grepl(":", keep) == FALSE)] keep2 = c() for (term in keep){ if(grepl(":", term) == TRUE){ a =fInteractions(term, fixed)[1] keep2 = unique(c(keep2, a)) } } keep = c(keep1, keep2) mainKeep <- keep[str_count(keep, ":")== 0 & grepl('\\^', keep)==FALSE] # for forward, if there is a keep, the termList will only # be constituted by fixedForward fixedForward = setdiff(fixed, keep) # function to create a interList and polyList, returns a list of lists # List of interaction terms: interList[[1]] is the twoWay vector finterPolyList = function(x){ HighestInteraction = max(str_count(x, ":")) interList = list() for (i in 1:HighestInteraction){ interList[i] = list(x[str_count(x, ":") == i]) } # List of polynomials, polyList[[2]] = quadratic degreeList = list() polyTerms = x[grepl('\\^', x)== TRUE] for (term in polyTerms){ degreeList[length(degreeList)+1] = strsplit(strsplit(term, "\\^")[[1]][2], ")")[[1]] } if (length(unlist(degreeList)) != 0){ maxDegree = strtoi(max(unlist(degreeList))) }else{ maxDegree = 0 } polyList = list() for (i in 1:maxDegree){ polyList[i] = list(polyTerms[which(i == unlist(degreeList))]) } return(list(interList, polyList, HighestInteraction, maxDegree)) } # everything including the "keep" resultList = finterPolyList(fixed) interList = resultList[[1]] polyList = resultList[[2]] HighestInteraction = resultList[[3]] maxDegree = resultList[[4]] # forward: create a list of lists of groups of terms that are eligible to be added # x = interList, y = polyList, z = main ftermList = function(x,y,z){ termList = list() # go through each term in the interList, get all its lower hierarchy terms # by going through the interMain (the sublists in interList and main) for (term in unlist(x)){ a = term termC =str_count(term, ":") interMain = c(unlist(interList[1:termC]), main) for (subterm in interMain) { if (all(is.element(unlist(strsplit(subterm, ":")), unlist(strsplit(term, ":")))) == TRUE){ a = list(a, subterm) } } termList[length(termList) +1] = list(unique(unlist(a))) } # create a list of vectors for each polynomial terms for (term in unlist(y)){ termD = strtoi(unlist(strsplit(strsplit(term, '\\^')[[1]][2], ")"))) polyMain = c(main, unlist(polyList[2:termD])) mainPart = unlist(strsplit(unlist(strsplit(term, '\\^'))[1], '\\('))[2] a = polyMain[which(grepl(mainPart, polyMain) == TRUE)] termList[length(termList) +1] = list(a) } # add the main effects to the termList for (term in z){ termList[length(termList)+1] = term } return(termList) } # create a vector that contains all the lower terms in "keep" if (!is.null(keep)){ interKeep = finterPolyList(keep)[1] polyKeep = finterPolyList(keep)[2] keepList = ftermList(interKeep, polyKeep, mainKeep) keep = unique(unlist(keepList)) } # for forward only: if (direction == "forward"){ # create a list of lists (termList) for all terms not in keep (and their lowers) # those will be the one that will be considered to be added fixednotkeep = setdiff(fixed, keep) interList1 = finterPolyList(fixednotkeep)[[1]] polyList1 = finterPolyList(fixednotkeep)[[2]] if (!is.null(keep)){ main1 = setdiff(main, unique(unlist(keepList))) }else{ main1 = main } termList = ftermList(interList1, polyList1, main1) # remove all "keep" from each sublist of termList for (i in 1:length(termList)){ termList[[i]] = setdiff(termList[[i]], keep) } } ## for backward, create a validToDrop function # x = interList, y = polyList fvalidToDrop = function(x,y){ validToDrop = c() HighestInteraction = max(str_count(fixed, ":")) # both are empty, the highets order is main if ((length(unlist(x)) == 0) & (length(unlist(y)) == 0)){ validToDrop = c(validToDrop, main) } # determine if an interaction term can be dropped if (length(unlist(x)) == 0){ validToDrop = validToDrop }else{ for (term in unlist(x[1:HighestInteraction])){ termC =str_count(term, ":") if (termC < HighestInteraction){ if (length(fInteractions(term, unlist(x[(termC):HighestInteraction]))) == 1){ validToDrop = c(validToDrop, term) } } if (termC == HighestInteraction){ validToDrop = c(validToDrop, term) } } } # determine if a polynomial term can be dropped if (length(unlist(y)) == 0){ validToDrop = validToDrop }else{ for (term in unlist(y[2:maxDegree])){ termD = strtoi(unlist(strsplit(strsplit(term, '\\^')[[1]][2], ")"))) if (termD < maxDegree){ if (length(fpolynomials(term, unlist(y[(termD+1):maxDegree]))) == 0){ validToDrop = c(validToDrop, term) } } if (termD == maxDegree){ validToDrop = c(validToDrop, term) } } } # when both interList and polyList are not empty, # determine if a main can be dropped if ((length(unlist(x)) != 0) & (length(unlist(y)) != 0)){ for (term in main){ if ((length(fMainInOthers(term, unlist(x)))==0) & (length(fMainInOthers(term, unlist(y)))==0)){ validToDrop = c(validToDrop, term) } } } # when interList is empty and polyList is not if((length(unlist(x)) != 0) & (length(unlist(y)) == 0)){ for (term in main){ if (length(fMainInOthers(term, unlist(x)))==0){ validToDrop = c(validToDrop, term) } } } # when polyList is empty and interList is not if ((length(unlist(y)) != 0) & (length(unlist(x)) == 0)) { for (term in main){ if (length(fMainInOthers(term, unlist(y)))==0){ validToDrop = c(validToDrop, term) } } } # remove "keep" from validToDrop # as long as the "keep" the user input is not dropped each time, # all its lower hierarchy terms will always be in the model as well. validToDrop = setdiff(validToDrop, keep) # to avoid repetition, unique it validToDrop = unique(validToDrop) return(validToDrop) } # get the categorical variables in the data file cate = split(names(dat), sapply(dat, function(x) paste(class(x), collapse = " ")))$factor #### FORWARD if (direction == "forward"){ ### Linear Fixed Regression Forward Model Selection if (is.null(random)){ addFrame = data.frame(vAdd = character(), method2 = integer(), stringsAsFactors = FALSE) # initial model variables = c(keep, 1) f = as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lm(f, data = dat) # if the initial model is not empty (i.e. there is "keep"), get rid of the "1" # else keep the "1" there to hold the space if (!is.null(keep)){ formFull = as.formula(paste(response, " ~ ", paste(setdiff(variables,1), collapse = "+"))) formFullX = paste(response, " ~ ", paste(setdiff(variables,1), collapse = " + ")) }else{ formFull = f formFullX = paste(response, " ~ ", paste(variables, collapse = " + ")) } formFull = paste("lm(", formFullX, ")", sep = "") # initilize pValue to something really small pValue = 0.000001 while(pValue <= alpha){ print("-----------------------------------------------------------") print("-----------------------------------------------------------") cat("The model now has ", length(variables), " terms, including the intercept, of which ", length(variables) - 1, " are/is fixed effect(s). \n\n", sep = "") # make each sublist in termList a string format termListStr = list() for ( i in 1:length(termList)){ mmrow = paste(termList[[i]], collapse = ", ") termListStr[length(termListStr)+1] = mmrow } termListFrame = t(as.data.frame(termListStr)) colnames(termListFrame) <- "AddGroup" rownames(termListFrame) <- c(1:length(termList)) cat("The groups of terms that are eligible to be added are: \n\n") print(termListFrame) cat("\n") # initialize a data.frame of the terms considered and its corresponding pValue frameMM = data.frame("Terms Considered" = character(), "Method" = integer(), stringsAsFactors = FALSE) # create an "i" to get to the specific place in termList i=0 methodList = list() # document the null model - model1 model1 = model for (group in termList){ i=i+1 # add a group each time to the model variables1 = unique(c(variables, group)) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) model2 = lm(f, data = dat) # anova to compare: model1 is the null model, model2 is the alternative model anovaTEST = anova(model1, model2) pValue = anovaTEST$`Pr(>F)`[2] methodList[length(methodList)+1] = pValue # initialize a new row to be added to frameMM frameMMRow = data.frame("Terms Considered" = character(), "Method" = integer(), stringsAsFactors = FALSE) frameMMRow = data.frame(termConsidered = paste(unlist(termList[i]), collapse = ", "), pValue = pValue) frameMM = rbind(frameMM, frameMMRow) } frameMM <- frameMM[order(unlist(methodList)), ] cat("The top five most significant group of terms are: \n\n") colnames(frameMM) = c("Terms Considered", "F-test p-Value") print(frameMM[1:5,]) cat("\n") # the location in methodList/termList of the minimum pValue vAddLoc = which.min(unlist(methodList)) vAdd = termList[[vAddLoc]] cat("The group of terms that will be added is: ", paste(vAdd, collapse = " + "), ".\n\n", sep = "") # update the model variables = c(variables, vAdd) f = as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lm(f, data = dat) # to show the new model, get rid of 1 var1 = setdiff(variables, 1) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) cat("The updated model is now: \n") print(form) cat("\n\n") # update the pValue pValue = methodList[[vAddLoc]] # remove the terms in vAdd from each list in termList for (i in 1:length(termList)){ termList[[i]] = setdiff(termList[[i]], vAdd) } # remove that group from termList termList = termList[-vAddLoc] # remove the terms in vAdd from fixed, main fixed = setdiff(fixed, vAdd) main = setdiff(main, vAdd) # if a sublist in the termList is empty, remove that list termList = termList[lapply(termList, length)>0] # create a data frame with all the GROUP added and its pValue newrow = data.frame(vAdd = character(), method2 = integer(), stringsAsFactors = FALSE) newrow = data.frame(TermsAdded = toString(vAdd), pValue = pValue) addFrame = rbind(addFrame, newrow) # change the colname of addFrame at the very end to avoid name match problem # when the pValue is big, meaning the LRT test shows that there is # a difference between the two models, so we drop the terms we just added if (pValue > alpha){ cat("Adding ", "'", paste(vAdd, collapse = " + "), "'", " will result in an F-test p-Value bigger than alpha, ", "so the term(s) will not be added and the process terminates. \n\n", sep = "") variables1 = setdiff(variables, vAdd) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lm(f, data = dat) var1 = setdiff(variables1, 1) # in the case nothing can be added and there is no keep, var1 might be empty if (length(var1) != 0){ form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) }else{ form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) formX = paste(response, " ~ ", paste(variables1, collapse = " + ")) };break } if (length(termList) == 0){ variables1 = variables f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lm(f, data = dat) var1 = setdiff(variables1, 1) if (length(var1) != 0){ form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) }else{ form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) formX = paste(response, " ~ ", paste(variables1, collapse = " + ")) };break} } form = paste("lm(", formX, ")", sep = "") } ### Linear Mixed Regression Forward Model Selection if (!is.null(random)){ # initial model has the random, keep part variables = c(random, keep, 1) f <- as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lmer(f, data = dat, REML = FALSE) addFrame = data.frame( method2 = integer(), stringsAsFactors = FALSE) methodFull = summary(model)$AICtab[method] formFull = paste(response, " ~ ", paste(setdiff(variables,1), collapse = " + ")) formFull = paste("lmer(", formFull, ")", sep = "") # AIC/BIC/LRT if (method == "AIC" | method == "BIC"){ method1 = 100000000 method2 = 10000 } if (method == "logLik"){ method2 = 0.00000001 method1 = alpha } # when method2 <= method1: keep adding while (method2 <= method1) { print("-----------------------------------------------------------") print("-----------------------------------------------------------") method1 = summary(model)$AICtab[method] cat("The model now has ", method, " value of ", method1, ".\n\n", sep = "") cat("The model now has ", length(variables), " terms, including the intercept, of which ", length(variables) - length(random) - 1, " are fixed effects and ", length(random), " are/is random effect(s).\n\n", sep = "") termListStr = list() for ( i in 1:length(termList)){ mmrow = paste(termList[[i]], collapse = ", ") termListStr[length(termListStr)+1] = mmrow } termListFrame = t(as.data.frame(termListStr)) colnames(termListFrame) <- "AddGroup" rownames(termListFrame) <- c(1:length(termList)) cat("The groups of terms that are eligible to be added are: \n\n") print(termListFrame) cat("\n\n") cat("R is going to fit ", length(termList), " model(s),", "each time adding one group of variable that is eligible to be added to the original model. \n\n", sep = "") tstart <- Sys.time() i=0 methodList = list() pValueList = list() model1 = model for (group in termList){ i=i+1 variables1 = c(variables, group) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) model2 = lmer(f, data = dat, REML = FALSE) allvalues = list(t(as.data.frame(summary(model2)$AICtab[1:4]))) methodList[length(methodList)+1] = allvalues anovaTEST = anova(model1, model2) pValue = anovaTEST$`Pr(>Chisq)`[2] pValueList[length(pValueList)+1] = pValue } roundT <- Sys.time() - tstart roundT = paste(round(unclass(roundT)[1],2), units(roundT)) cat("The ", length(termList), " models were fitted in ", roundT, ".\n\n", sep = "") # make methodList/pValueList into a data.frame # since merge will mess up the sequence, we make them have the same row names frameMM = do.call(rbind, methodList) row.names(frameMM) <- termList pValueFrame = do.call(rbind, pValueList) row.names(pValueFrame) <- termList frameMM = merge(x = frameMM, y = pValueFrame, by = "row.names", all.y = TRUE) # change the column names colnames(frameMM)<- c("Group Considered", "AIC", "BIC", "logLik", "deviance", "LRT p-Value") # print frameMMStr, but get the vAdd from frameMM frameMMStr = do.call(rbind, methodList) row.names(frameMMStr) <- termListStr pValueFrameStr = do.call(rbind, pValueList) row.names(pValueFrameStr) <- termListStr frameMMStr = merge(x = frameMMStr, y = pValueFrameStr, by = "row.names", all.y = TRUE) colnames(frameMMStr)<- c("Group Considered", "AIC", "BIC", "logLik", "deviance", "LRT p-Value") # increasing order for AIC/BIC if (method == "AIC" | method == "BIC"){ frameMM1 = frameMM[order(frameMM[, method]), ] frameMM1Str = frameMMStr[order(frameMMStr[, method]), ] cat("The top five alternative models with the lowest ", method, " are: \n\n", sep = "") print(frameMM1Str[1:5, ]) cat("\n\n") } # increasing order for LRT pvalue if(method == "logLik"){ frameMM1 = frameMM[order(frameMM[, 6]), ] frameMM1Str = frameMMStr[order(frameMMStr[, 6]), ] cat("The top five alternative models with the highest LRT p-Value are: \n\n") print(frameMM1Str[1:5, ]) cat("\n\n") } # VAdd is the one with the smallest value vAdd = frameMM1[1,1] # location in the termList vAddLoc = which(termList == vAdd) # this gives you the vector format for vAdd vAdd = termList[[vAddLoc]] # update the model and get the new method value variables = c(variables, vAdd) form1 <- as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lmer(form1, data = dat, REML = FALSE) method2 = summary(model)$AICtab[method] cat("The group of terms that will be added is: ", paste(vAdd, collapse = " + "), ".\n\n", sep = "") # show the model formula without the 1 var1 = setdiff(variables,1) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) cat("The updated model is now: \n") print(form) cat("\n\n") cat("The updated model has ", method, " value of: ", method2, ".\n\n", sep = "") # remove the terms in vAdd from each list in termList for (i in 1:length(termList)){ termList[[i]] = setdiff(termList[[i]], vAdd) } # remove that group from termList termList = termList[-vAddLoc] # remove the terms in vAdd from fixed, main fixed = setdiff(fixed, vAdd) main = setdiff(main, vAdd) # if a sublist in the termList is empty, remove that list termList = termList[lapply(termList, length)>0] # for AIC/BIC, add the value to the dropFrame if (method == "AIC" | method == "BIC"){ newrow = data.frame(method2 = integer(), stringsAsFactors = FALSE) newrow = data.frame(method2) row.names(newrow)<- list(vAdd) addFrame = rbind(addFrame, newrow) } # for LRT, add the pValue to the drop frame. # make method2 equal to alpha, method1 the pValue # to keep the method2 < method1 the same criterion as that of AIC/BIC if(method == "logLik"){ method2 = frameMM[1, 6] cat("The LRT test's pValue to compare this alternative model with the null model is: ", method2, ".\n", sep = "") method1 = alpha newrow = data.frame(method2 = integer(), stringsAsFactors = FALSE) newrow = data.frame(method2) row.names(newrow) = list(vAdd) addFrame = rbind(addFrame, newrow) } # when AIC starts to increase, drop the term just added and break the loop # when termList is empty, break the loop # no need to worry about keeping 1 there to hold the space, # because there is always a random there if (method2 > method1){ cat("Adding ", "'", paste(vAdd, collapse = " + "), "'", " will result in an increase in ", method, " value. So ", "the term(s) will not be added to the model and the model selection process terminates. \n\n", sep = "") variables1 = setdiff(variables, vAdd) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lmer(f, data = dat, REML = FALSE) var1 = setdiff(variables1, 1) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } if (length(termList) == 0){ variables1 = variables f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lmer(f, data = dat, REML = FALSE) var1 = setdiff(variables1, 1) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } } if (method == "AIC" | method == "BIC"){ colnames(addFrame) = method } if (method == "logLik"){ colnames(addFrame) = "LRT pValue" } form = paste(response, " ~ ", paste(var1, collapse = " + ")) form = paste("lmer(", form, ")", sep = "") } } #### BACKWARD if (direction == "backward"){ ### Linear Fixed Regression Backward Model Selection if (is.null(random)){ dropFrame = data.frame("Term Dropped" = character(), "p-value" = integer(), stringsAsFactors = FALSE) # initialize the model variables = c(fixed,1) f <- as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lm(f, data = dat) # document the starting model, without the "1" formFull = paste(response, " ~ ", paste(fixed, collapse = " + ")) formFull = paste("lm(", formFull, ")", sep = "") # initialize the pValue to a big number pValue = 100000 ## categorical, F-test if (any(is.element(cate, fixed1)) == TRUE){ validToDrop = fvalidToDrop(interList, polyList) # when there are still terms that are eligible to be dropped and pValue big while ((pValue >= alpha) & (length(validToDrop) > 0)){ print("-----------------------------------------------------------") print("-----------------------------------------------------------") # evoke the fvalidToDrop function validToDrop = fvalidToDrop(interList, polyList) cat("There are ", length(validToDrop), " terms that are eligible to be dropped and these are:\n\n", sep = "") print(validToDrop) cat("\n") # drop1 calculates the type III SS options(contrasts = c("contr.sum", "contr.poly")) allTerms <- drop1(model, .~., test = "F") # create a data frame for the pValue, with row names the terms frameMM = as.data.frame(round(allTerms$`Pr(>F)`, 4)) row.names(frameMM)<- row.names(allTerms) # create a data frame for the terms, with row names the terms namesMM <- as.data.frame(row.names(allTerms)) row.names(namesMM)<- row.names(allTerms) # put the data frame together based on row names frameMM = merge(x = namesMM, y = frameMM, by = "row.names", all.y = TRUE) # only keep the last two columns diml = dim(frameMM)[1] frameMM = frameMM[2:diml ,2:3] # rename the columns colnames(frameMM) = c("Terms", "pValue") # decreasing order based on pValue frameMM = frameMM[order(frameMM[, 2], decreasing = TRUE), ] # drop the one with the biggest p-value vDrop <- as.character(frameMM[1,1]) # when the first one is not in the validToDrop, go down to the list while (is.element(vDrop, validToDrop) == FALSE){ vDrop = as.character(frameMM[(which(frameMM$Terms == vDrop) +1), 1]) } # get the vDrop location vDropLoc = which(frameMM$Terms == vDrop) # update the pValue pValue = frameMM[vDropLoc,2] # print the frameMM, if there are less than 5 terms, print the entire frame # else: if vDrop is not among the top5, print everything until the vDrop cat("The top least significant terms are:\n") if (dim(frameMM)[1] <=5){ print(frameMM) }else { if (vDropLoc <= 5){ print(frameMM[1:5, ]) }else{ print(frameMM[1:vDropLoc, ]) } } cat("\n") # add vDrop to the dropFrame newrow = data.frame("Term Dropped" = character(), "p-value" = integer(), stringsAsFactors = FALSE) newrow = data.frame(vDrop, pValue) dropFrame = rbind(dropFrame, newrow) cat('\n') cat("The largest pValue for the comparison of the alternative model and the null model is " , pValue, ".\n\n", sep = "") cat("The term that will be dropped is: ", vDrop, ".\n\n", sep = "") # manually update the model, and that should be new model variables1 = variables[variables != vDrop] f = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) model = lm(f, data = dat) var1 = setdiff(variables1, 1) if (length(var1) != 0){ form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) }else{ form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) } cat("The updated model is now: \n") print(form) cat("\n\n") # remove the vDrop term from everywhere vDropC =str_count(vDrop, ":") vDropD = strtoi(unlist(strsplit(strsplit(vDrop, '\\^')[[1]][2], ")"))) # remove from interList if (vDropC != 0) { interList[[vDropC]] = interList[[vDropC]][interList[[vDropC]] != vDrop] } # remove from polyList if (grepl('\\^', vDrop) == TRUE){ polyList[[vDropD]] = polyList[[vDropD]][polyList[[vDropD]] != vDrop] } # remove from variables, fixed, main variables = variables[variables != vDrop] fixed = fixed[fixed != vDrop] main = main[main != vDrop] validToDrop = fvalidToDrop(interList, polyList) if ( pValue < alpha) { cat("'", vDrop, "'", " has a pValue smaller than alpha, so ", "'", vDrop, "'", " will be added back to the model and the model selection process terminates.\n\n", sep = "") variables1 = c(variables, vDrop) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lm(f, data = dat) var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+")));break } if (length(validToDrop) == 0){ variables1 = variables f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lm(f, data = dat) var1 = setdiff(variables1, 1) if (length(var1) != 0){ formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) }else{ formX = paste(response, " ~ ", paste(variables1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) };break } } } ## if there no categorical variable, T-test if (any(is.element(cate, fixed1)) == FALSE){ validToDrop = fvalidToDrop(interList, polyList) while ((pValue >= alpha) & (length(validToDrop) > 0)){ print("-----------------------------------------------------------") print("-----------------------------------------------------------") # evoke the fvalidToDrop function validToDrop = fvalidToDrop(interList, polyList) cat("There are ", length(validToDrop), " terms that are eligible to be dropped and these are:\n\n") print(validToDrop) cat("\n\n") # order the summary(model) based on the p-value, decreasing order ii <- order(summary(model)$coefficients[, 4], decreasing = TRUE) # create a data frame with the variable name and its p-value frameMM <-summary(model)$coefficients[ii, ] # drop the one with the biggest p-value vDrop <- row.names(summary(model)$coefficients[ii, ])[1] # when the first one is not in the validToDrop, go down to the list while (is.element(vDrop, validToDrop) == FALSE){ vDrop = row.names(summary(model)$coefficients[ii, ] )[which(vDrop == row.names(summary(model)$coefficients[ii, ])) + 1] } # get the vDrop location vDropLoc = which(vDrop == row.names(frameMM)) # print the data frame, if there is less than 5 terms, print the entire frame # else: if vDrop is not among the top5, print everything until the vDrop cat("The top least significant terms are: \n") if (dim(frameMM)[1] <=5){ print(frameMM) }else { if (vDropLoc <= 5){ print(frameMM[1:5, ]) }else{ print(frameMM[1:vDropLoc, ]) } } cat('\n\n') vDropC =str_count(vDrop, ":") vDropD = strtoi(unlist(strsplit(strsplit(vDrop, '\\^')[[1]][2], ")"))) # remove vDrop from interList if (vDropC != 0) { interList[[vDropC]] = interList[[vDropC]][interList[[vDropC]] != vDrop] } # remove vDrop from polyList if (grepl('\\^', vDrop) == TRUE){ polyList[[vDropD]] = polyList[[vDropD]][polyList[[vDropD]] != vDrop] } # remove vDrop from variables, fixed, main variables = variables[variables != vDrop] fixed = fixed[fixed != vDrop] main = main[main != vDrop] # manually update the model, and that should be new null model variables1 = variables[variables != vDrop] model = lm(as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))), data = dat) var1 = setdiff(variables1, 1) if (length(var1) != 0){ form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) }else{ form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) } # the pValue of the insignificant term and that term is dropped by now pValue = frameMM[vDropLoc, ][4] cat("The term that will be dropped is: ", vDrop, ".\n\n", sep = "") cat("The updated model is now: \n") print(form) cat("\n\n") # create a data frame with all the terms dropped and its pValue newrow = data.frame("Term Dropped" = character(), "p-value" = integer(), stringsAsFactors = FALSE) newrow = data.frame(vDrop, pValue) dropFrame = rbind(dropFrame, newrow) validToDrop = fvalidToDrop(interList, polyList) # the pValue is that of the "insignificant" term and it is dropped # so when it becomes "significant", we need to add it back if ( pValue < alpha ){ cat("'", vDrop, "'", " has a pValue smaller than alpha, so ", "'", vDrop, "'", " will be added back to the model and the model selection process terminates.\n\n", sep = "") variables1 = c(variables, vDrop) f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) finalModel = lm(f, data = dat) var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } if(length(validToDrop) == 0){ variables1 = variables f <- as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) var1 = setdiff(variables1, 1) if (length(var1) != 0){ formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))) }else{ formX = paste(response, " ~ ", paste(variables1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) }; break } } } form = paste("lm(", formX,")", sep = "") } ### Linear Mixed Regression Backward Model Selection if (!is.null(random)){ variables = c(random, fixed, 1) f <- as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))) model = lmer(f, data = dat, REML = FALSE) methodFull = summary(model)$AICtab[method] formFull = paste(response, " ~ ", paste(c(random, fixed), collapse = " + ")) formFull = paste("lmer(", formFull,")", sep = "") # create a drop frame dropFrame = data.frame(vDrop = character(), method2 = integer(), stringsAsFactors = FALSE) # AIC/BIC/Likelihood Ratio Test if (method == "AIC" | method == "BIC" | method == "logLik"){ method1 = 100000000 method2 = 100000 validToDrop = fvalidToDrop(interList, polyList) while ((method2 <= method1) & (length(validToDrop) > 0)){ print("-----------------------------------------------------------") print("-----------------------------------------------------------") method1 = summary(model)$AICtab[method] cat("The model now has ", method, " value of ", method1, ".\n\n", sep = "") cat("The model now has ", length(variables)-1, " terms, of which ", length(fixed), " are fixed effects and ", length(random), " are/is random effect(s).\n\n", sep = "") # evoke the fvalidToDrop function validToDrop = fvalidToDrop(interList, polyList) cat("There are ", length(validToDrop), " terms that are eligible to be dropped and these are:\n\n", sep = "") print(validToDrop) cat("\n\n") cat("R is going to fit ", length(validToDrop), " model(s), ", "each time removing one variable that is eligible to be dropped from the original model. \n\n", sep = "") tstart <- Sys.time() # document the current model for future comparison model1 = model methodList = list() pValueList = list() # fit the models; add an anova() to get the pvalue for the LRT case for (term in validToDrop){ variables1 = variables[variables != term] model2 = lmer(as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))), data = dat, REML = FALSE) allvalues = list(t(as.data.frame(summary(model2)$AICtab[1:4]))) methodList[length(methodList)+1] = allvalues anovaTEST = anova(model1, model2) pValue = anovaTEST$`Pr(>Chisq)`[2] pValueList[length(pValueList)+1] = pValue } roundT <- Sys.time() - tstart roundT = paste(round(unclass(roundT)[1],2), units(roundT)) cat("The ", length(validToDrop), " models were fitted in ", roundT, ".\n\n", sep = "") # make methodList/pValueList into a data.frame frameMM = do.call(rbind, methodList) row.names(frameMM) <- validToDrop pValueFrame = do.call(rbind, pValueList) row.names(pValueFrame) <- validToDrop frameMM = merge(x = frameMM, y = pValueFrame, by = "row.names", all.y = TRUE) # change the column names colnames(frameMM)<- c("Term Dropped", "AIC", "BIC", "logLik", "deviance", "LRT p-Value") # increasing order for AIC/BIC if (method == "AIC" | method == "BIC"){ frameMM = frameMM[order(frameMM[, method]), ] cat("The top five alternative models with the lowest ", method, " are: \n\n", sep = "") print(frameMM[1:5, ]) cat("\n\n") } # decreasing order for LRT pvalue if(method == "logLik"){ frameMM = frameMM[order(frameMM[, 6], decreasing = TRUE), ] cat("The top five alternative models with the highest LRT p-Value are: \n\n") print(frameMM[1:5, ]) cat("\n\n") } # drop the one in the firt row, ie lowest AIC/BIC, biggest LRT pValue vDrop = frameMM[1,][,1] cat("The term that will be dropped is: " ,vDrop, ".\n\n", sep = "") # remove vDrop from everywhere vDropC = str_count(vDrop, ":") vDropD = strtoi(unlist(strsplit(strsplit(vDrop, '\\^')[[1]][2], ")"))) # from interList if (vDropC != 0) { interList[[vDropC]] = interList[[vDropC]][interList[[vDropC]] != vDrop] } # from polyList if (grepl('\\^', vDrop) == TRUE){ polyList[[vDropD]] = polyList[[vDropD]][polyList[[vDropD]] != vDrop] } # from variables, fixed, main, validToDrop variables = variables[variables != vDrop] fixed = fixed[fixed != vDrop] main = main[main != vDrop] validToDrop = validToDrop[validToDrop != validToDrop] # update the new model model = lmer(as.formula(paste(response, " ~ ", paste(variables, collapse = "+"))), data = dat, REML = FALSE) # for AIC/BIC, add the value to the dropFrame if (method == "AIC" | method == "BIC"){ method2 = frameMM[1, method] cat("The updated model now has ", method, " value of ", method2, ".\n\n", sep = "") newrow = data.frame(vDrop = character(), method2 = integer(), stringsAsFactors = FALSE) newrow = data.frame(vDrop, method2) dropFrame = rbind(dropFrame, newrow) } cat("The updated model is now: \n") form = as.formula(paste(response, " ~ ", paste(variables[variables != 1], collapse = "+"))) print(form) cat("\n\n") # for LRT, add the pValue to the drop frame. # make method2 equal to alpha, method1 the pValue #to keep the method2 < method1 the same criterion as that of AIC/BIC if(method == "logLik"){ method1 = frameMM[1, 6] vlogLik = frameMM[1, method] cat("The updated model now has ", method, " value of ", vlogLik, ".\n\n", sep = "") cat("The LRT test's pValue to compare this alternative model with the null model is: ", method1, ".\n", sep = "") method2 = alpha newrow = data.frame(vDrop = character(), method2 = integer(), stringsAsFactors = FALSE) newrow = data.frame(vDrop, method1) dropFrame = rbind(dropFrame, newrow) } validToDrop = fvalidToDrop(interList, polyList) if (method2 > method1){ cat("Dropping ", "'", vDrop, "'", " will result in an increase in ", method, " value, so ", vDrop, " will be added back to the model and the model selection process terminates.\n\n", sep = "") variables1 = c(random, fixed, vDrop) var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } if (length(validToDrop) == 0){ variables1 = variables var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } } } # tValue if (method == "tValue"){ zValue = qnorm(1-alpha) tValue = 0.0000001 validToDrop = fvalidToDrop(interList, polyList) while ((tValue <= zValue) & length(validToDrop) > 0){ print("-------------------------------------------------") print("-------------------------------------------------") # evoke the fvalidToDrop function validToDrop = fvalidToDrop(interList, polyList) # increasing order based on the absolute value of the t-value, ii <- order(abs(summary(model)$coefficients[, 3])) frameMM <-summary(model)$coefficients[ii, ] vDrop <- row.names(summary(model)$coefficients[ii, ])[1] # when the first one is not eligible to be dropped, go down on the list while (is.element(vDrop, validToDrop) == FALSE){ vDrop = row.names(summary(model)$coefficients[ii, ] )[which(vDrop == row.names(summary(model)$coefficients[ii, ])) + 1] } vDropLoc = which(vDrop == row.names(frameMM)) cat("The top least significant terms are: \n") if (dim(frameMM)[1] <=5){ print(frameMM) }else { if (vDropLoc <= 5){ print(frameMM[1:5, ]) }else{ print(frameMM[1:vDropLoc, ]) } } cat("\n\n") # remove vDrop from everywhere vDropC =str_count(vDrop, ":") vDropD = strtoi(unlist(strsplit(strsplit(vDrop, '\\^')[[1]][2], ")"))) if (vDropC != 0) { interList[[vDropC]] = interList[[vDropC]][interList[[vDropC]] != vDrop] } if (grepl('\\^', vDrop) == TRUE){ polyList[[vDropD]] = polyList[[vDropD]][polyList[[vDropD]] != vDrop] } variables = variables[variables != vDrop] fixed = fixed[fixed != vDrop] main = main[main != vDrop] # get the new tvalue tValue = abs(frameMM[vDropLoc, ][3]) # create a drop frame newrow = data.frame(vDrop = character(), tValue = integer(), stringsAsFactors = FALSE) newrow = data.frame(vDrop, tValue) dropFrame = rbind(dropFrame, newrow) cat("The term that will be dropped is ", vDrop, ".\n\n", sep = "") cat("The updated model is now: \n") variables1 = variables[variables != vDrop] form = as.formula(paste(response, " ~ ", paste(variables1, collapse = "+"))) print(form) cat("\n\n") validToDrop = fvalidToDrop(interList, polyList) if (tValue >= zValue){ cat("'", vDrop, "'", " has a bigger tValue than zValue calculated from the alpha the user provided, so ", "'", vDrop, "'", " will be added back to the model and the model selection process terminates.\n\n", sep = "") variables1 = c(variables, vDrop) var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } if (length(validToDrop) == 0){ variables1 = variables var1 = setdiff(variables1, 1) formX = paste(response, " ~ ", paste(var1, collapse = " + ")) form = as.formula(paste(response, " ~ ", paste(var1, collapse = "+"))); break } } } # change the column names for the dropFrame if (method == "AIC" | method == "BIC"){ colnames(dropFrame) = c("Term dropped", method) } if (method == "logLik"){ colnames(dropFrame) = c("Term dropped", "LRT p-Value") } if(method == "tValue"){ colnames(dropFrame) = c("Term dropped", "tValue") } form = paste("lmer(", formX,")", sep = "") } } runTime <- Sys.time() - overallT runT = paste(round(unclass(runTime)[1],2), units(runTime)) if(direction == "forward") { path = addFrame } if(direction == "backward"){ path = dropFrame } if(direction == "forward"){ if (is.null(random)){ returnList <- list(finalModel = form, termsKept = keep, direction = direction, path = path, alpha = alpha, runningTime = runT) } if (!is.null(random) & (method == "AIC" | method == "BIC")){ returnList <- list(finalModel = form, termsKept = keep, direction = direction, method = method, origMethodValue = as.numeric(toString(round(methodFull, 2))), path = path, runningTime = runT) } if (!is.null(random) & method == "logLik"){ returnList <- list(finalModel = form, termsKept = keep, direction = direction, method = methodLRT, path = path, alpha = alpha, runningTime = runT) } } if (direction == "backward"){ if (is.null(random)){ returnList <- list(fullModel = formFull, finalModel = form, termsKept = keep, direction = direction, path = path, alpha = alpha, runningTime = runT) } if (!is.null(random) & (method == "AIC" | method == "BIC")){ returnList <- list(fullModel = formFull, finalModel = form, termsKept = keep, direction = direction, method = method, origMethodValue = as.numeric(toString(round(methodFull, 2))), path = path, runningTime = runT) } if (!is.null(random) & method == "logLik"){ returnList <- list(fullModel = formFull, finalModel = form, termsKept = keep, direction = direction, method = method, path = path, alpha = alpha, runningTime = runT) } if (!is.null(random) & method == "tValue"){ returnList <- list(fullModel = formFull, finalModel = form, termsKept = keep, direction = direction, method = method, path = path, threshold = c(alpha = alpha, zValue = zValue), runningTime = runT) } } if (direction == "backward" & length(random) != 0 & method == "tValue"){ cat("We are using normal approximation to calculate the t cut off. \n") cat("For example, if alpha = 0.05, the cut-off is approximately 1.96. \n") } if (!is.null(sinkit)){ print("-------------------------------------------------") print("-------------------------------------------------") print("-------------------------------------------------") print(returnList) sink() } return(returnList) } modSel/README.Rmd0000644000076500000240000000135213160071767012445 0ustar00CiCistaff--- output: md_document: variant: markdown_github --- ```{r, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-" ) ``` # modSel The goal of modSel is to perform backward or forward model selection with both mixed or fixed effects models based on different criterions while preserving the effects hierarchy. ## Example This is a basic example which shows you how to solve a common problem: ```{r example} # make chas a factor to use it as a random effect fxd = c("HeadWt", "Cult", "I(HeadWt^2)") modelSelection(response = "VitC", fixed = fxd, dat = cabbages, direction = "backward") ``` modSel/cran-comments.md0000644000076500000240000000073513155375346014144 0ustar00CiCistaff## Test environments * local OS X install, R 3.3.2 * ubuntu 12.04 (on travis-ci), R 3.3.2 * win-builder (devel and release) ## R CMD check results 0 errors | 0 warnings | 1 note * This is a new release. ## Reverse dependencies This is a new release, so there are no reverse dependencies. --- * I have run R CMD check on the NUMBER downstream dependencies. (Summary at ...). * FAILURE SUMMARY * All revdep maintainers were notified of the release on RELEASE DATE. modSel/example.txt0000644000076500000240000007161713211324775013250 0ustar00CiCistaff[1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3446.422. The model now has 26 terms, of which 25 are fixed effects and 1 are/is random effect(s). There are 10 terms that are eligible to be dropped and these are: [1] "tax:black" "tax:medv" "indus:rm" [4] "indus:ptratio" "tax:indus:nox" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(ptratio^2)" [10] "I(tax^4)" R is going to fit 10 model(s), each time removing one variable that is eligible to be dropped from the original model. The 10 models were fitted in 0.82 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 6 indus:rm 3326.123 3440.239 -1636.061 3272.123 8 tax:indus:nox 3326.147 3440.263 -1636.073 3272.147 7 tax:black 3326.212 3440.328 -1636.106 3272.212 3 I(ptratio^2) 3326.613 3440.729 -1636.306 3272.613 5 indus:ptratio 3327.127 3441.243 -1636.563 3273.127 LRT p-Value 6 0.8341859 8 0.7946782 7 0.7157352 3 0.4651557 5 0.3060152 The term that will be dropped is: indus:rm. The updated model now has BIC value of 3440.239. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3440.239. The model now has 25 terms, of which 24 are fixed effects and 1 are/is random effect(s). There are 9 terms that are eligible to be dropped and these are: [1] "tax:black" "tax:medv" "indus:ptratio" [4] "tax:indus:nox" "tax:rm:ptratio" "I(indus^2)" [7] "I(nox^2)" "I(ptratio^2)" "I(tax^4)" R is going to fit 9 model(s), each time removing one variable that is eligible to be dropped from the original model. The 9 models were fitted in 0.64 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 7 tax:indus:nox 3324.191 3434.081 -1636.096 3272.191 6 tax:black 3324.248 3434.138 -1636.124 3272.248 3 I(ptratio^2) 3324.647 3434.537 -1636.323 3272.647 5 indus:ptratio 3325.178 3435.068 -1636.589 3273.178 1 I(indus^2) 3325.903 3435.792 -1636.951 3273.903 LRT p-Value 7 0.7939316 6 0.7233357 3 0.4691850 5 0.3044141 1 0.1822032 The term that will be dropped is: tax:indus:nox. The updated model now has BIC value of 3434.081. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3434.081. The model now has 24 terms, of which 23 are fixed effects and 1 are/is random effect(s). There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:black" [4] "tax:medv" "indus:nox" "indus:ptratio" [7] "tax:rm:ptratio" "I(indus^2)" "I(nox^2)" [10] "I(ptratio^2)" "I(tax^4)" R is going to fit 11 model(s), each time removing one variable that is eligible to be dropped from the original model. The 11 models were fitted in 0.63 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 7 tax:black 3322.303 3427.966 -1636.151 3272.303 3 I(ptratio^2) 3322.680 3428.344 -1636.340 3272.680 6 indus:ptratio 3323.388 3429.052 -1636.694 3273.388 11 tax:rm:ptratio 3324.191 3429.854 -1637.095 3274.191 1 I(indus^2) 3324.369 3430.032 -1637.185 3274.369 LRT p-Value 7 0.7383091 3 0.4843376 6 0.2738780 11 0.1573531 1 0.1400148 The term that will be dropped is: tax:black. The updated model now has BIC value of 3427.966. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3427.966. The model now has 23 terms, of which 22 are fixed effects and 1 are/is random effect(s). There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "indus:ptratio" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(ptratio^2)" [10] "I(tax^4)" "black" R is going to fit 11 model(s), each time removing one variable that is eligible to be dropped from the original model. The 11 models were fitted in 0.67 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 4 I(ptratio^2) 3320.756 3422.193 -1636.378 3272.756 7 indus:ptratio 3321.419 3422.856 -1636.709 3273.419 11 tax:rm:ptratio 3322.392 3423.829 -1637.196 3274.392 1 black 3322.401 3423.838 -1637.200 3274.401 2 I(indus^2) 3322.456 3423.893 -1637.228 3274.456 LRT p-Value 4 0.5007318 7 0.2908009 11 0.1483445 1 0.1475058 2 0.1422874 The term that will be dropped is: I(ptratio^2). The updated model now has BIC value of 3422.193. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3422.193. The model now has 22 terms, of which 21 are fixed effects and 1 are/is random effect(s). There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "indus:ptratio" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(tax^4)" [10] "black" R is going to fit 10 model(s), each time removing one variable that is eligible to be dropped from the original model. The 10 models were fitted in 0.61 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 6 indus:ptratio 3319.783 3416.994 -1636.892 3273.783 10 tax:rm:ptratio 3320.611 3417.821 -1637.305 3274.611 2 I(indus^2) 3320.663 3417.874 -1637.332 3274.663 1 black 3320.944 3418.154 -1637.472 3274.944 7 tax:indus 3321.302 3418.512 -1637.651 3275.302 LRT p-Value 6 0.3108184 10 0.1732659 2 0.1672869 1 0.1391538 7 0.1105813 The term that will be dropped is: indus:ptratio. The updated model now has BIC value of 3416.994. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3416.994. The model now has 21 terms, of which 20 are fixed effects and 1 are/is random effect(s). There are 9 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "tax:rm:ptratio" "I(indus^2)" [7] "I(nox^2)" "I(tax^4)" "black" R is going to fit 9 model(s), each time removing one variable that is eligible to be dropped from the original model. The 9 models were fitted in 0.49 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 9 tax:rm:ptratio 3319.495 3412.479 -1637.747 3275.495 6 tax:indus 3319.920 3412.903 -1637.960 3275.920 1 black 3320.242 3413.225 -1638.121 3276.242 2 I(indus^2) 3320.817 3413.801 -1638.408 3276.817 8 tax:nox 3321.167 3414.151 -1638.584 3277.167 LRT p-Value 9 0.19079122 6 0.14385827 1 0.11690771 2 0.08156354 8 0.06583286 The term that will be dropped is: tax:rm:ptratio. The updated model now has BIC value of 3412.479. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3412.479. The model now has 20 terms, of which 19 are fixed effects and 1 are/is random effect(s). There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" [4] "tax:ptratio" "tax:medv" "indus:nox" [7] "rm:ptratio" "I(indus^2)" "I(nox^2)" [10] "I(tax^4)" "black" R is going to fit 11 model(s), each time removing one variable that is eligible to be dropped from the original model. The 11 models were fitted in 0.6 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 6 rm:ptratio 3318.053 3406.811 -1638.027 3276.053 11 tax:rm 3318.175 3406.933 -1638.088 3276.175 10 tax:ptratio 3318.527 3407.284 -1638.263 3276.527 7 tax:indus 3319.807 3408.564 -1638.904 3277.807 9 tax:nox 3319.892 3408.649 -1638.946 3277.892 LRT p-Value 6 0.4548622 11 0.4094350 10 0.3097580 7 0.1283526 9 0.1215756 The term that will be dropped is: rm:ptratio. The updated model now has BIC value of 3406.811. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3406.811. The model now has 19 terms, of which 18 are fixed effects and 1 are/is random effect(s). There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" [4] "tax:ptratio" "tax:medv" "indus:nox" [7] "I(indus^2)" "I(nox^2)" "I(tax^4)" [10] "black" R is going to fit 10 model(s), each time removing one variable that is eligible to be dropped from the original model. The 10 models were fitted in 0.49 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 9 tax:ptratio 3316.885 3401.415 -1638.442 3276.885 10 tax:rm 3317.347 3401.878 -1638.674 3277.347 2 I(indus^2) 3318.219 3402.750 -1639.109 3278.219 8 tax:nox 3318.238 3402.769 -1639.119 3278.238 1 black 3318.554 3403.085 -1639.277 3278.554 LRT p-Value 9 0.3619009 10 0.2553530 2 0.1411355 8 0.1393877 1 0.1138200 The term that will be dropped is: tax:ptratio. The updated model now has BIC value of 3401.415. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3401.415. The model now has 18 terms, of which 17 are fixed effects and 1 are/is random effect(s). There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" "tax:medv" [5] "indus:nox" "I(indus^2)" "I(nox^2)" "I(tax^4)" [9] "ptratio" "black" R is going to fit 10 model(s), each time removing one variable that is eligible to be dropped from the original model. The 10 models were fitted in 0.6 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 10 tax:rm 3316.205 3396.509 -1639.102 3278.205 6 ptratio 3316.909 3397.213 -1639.454 3278.909 1 black 3317.532 3397.836 -1639.766 3279.532 9 tax:nox 3318.088 3398.392 -1640.044 3280.088 7 tax:indus 3318.765 3399.070 -1640.383 3280.765 LRT p-Value 10 0.25062300 6 0.15480126 1 0.10371649 9 0.07349121 7 0.04884338 The term that will be dropped is: tax:rm. The updated model now has BIC value of 3396.509. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3396.509. The model now has 17 terms, of which 16 are fixed effects and 1 are/is random effect(s). There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(indus^2)" "I(nox^2)" "I(tax^4)" "rm" [9] "ptratio" "black" R is going to fit 10 model(s), each time removing one variable that is eligible to be dropped from the original model. The 10 models were fitted in 0.5 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 6 ptratio 3315.744 3391.821 -1639.872 3279.744 1 black 3316.764 3392.841 -1640.382 3280.764 10 tax:nox 3317.650 3393.727 -1640.825 3281.650 2 I(indus^2) 3317.791 3393.869 -1640.896 3281.791 8 tax:indus 3318.072 3394.150 -1641.036 3282.072 LRT p-Value 6 0.21476297 1 0.10966661 10 0.06343557 2 0.05823948 8 0.04921592 The term that will be dropped is: ptratio. The updated model now has BIC value of 3391.821. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3391.821. The model now has 16 terms, of which 15 are fixed effects and 1 are/is random effect(s). There are 9 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(indus^2)" "I(nox^2)" "I(tax^4)" "rm" [9] "black" R is going to fit 9 model(s), each time removing one variable that is eligible to be dropped from the original model. The 9 models were fitted in 0.41 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 2 I(indus^2) 3316.438 3388.289 -1641.219 3282.438 1 black 3316.568 3388.419 -1641.284 3282.568 9 tax:nox 3317.586 3389.437 -1641.793 3283.586 7 tax:indus 3317.946 3389.797 -1641.973 3283.946 6 rm 3319.657 3391.508 -1642.829 3285.657 LRT p-Value 2 0.10068600 1 0.09282124 9 0.04997766 7 0.04037089 6 0.01502318 The term that will be dropped is: I(indus^2). The updated model now has BIC value of 3388.289. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3388.289. The model now has 15 terms, of which 14 are fixed effects and 1 are/is random effect(s). There are 8 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(nox^2)" "I(tax^4)" "rm" "black" R is going to fit 8 model(s), each time removing one variable that is eligible to be dropped from the original model. The 8 models were fitted in 0.45 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 8 tax:nox 3316.320 3383.944 -1642.160 3284.320 1 black 3317.760 3385.384 -1642.880 3285.760 6 tax:indus 3318.184 3385.809 -1643.092 3286.184 4 indus:nox 3318.415 3386.040 -1643.208 3286.415 2 I(nox^2) 3318.418 3386.043 -1643.209 3286.418 LRT p-Value 8 0.17014263 1 0.06837285 6 0.05292698 4 0.04611805 2 0.04603698 The term that will be dropped is: tax:nox. The updated model now has BIC value of 3383.944. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3383.944. The model now has 14 terms, of which 13 are fixed effects and 1 are/is random effect(s). There are 7 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "indus:nox" "I(nox^2)" [5] "I(tax^4)" "rm" "black" R is going to fit 7 model(s), each time removing one variable that is eligible to be dropped from the original model. The 7 models were fitted in 0.31 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 4 indus:nox 3316.824 3380.222 -1643.412 3286.824 2 I(nox^2) 3317.889 3381.287 -1643.944 3287.889 1 black 3318.263 3381.661 -1644.132 3288.263 6 tax:indus 3318.379 3381.777 -1644.190 3288.379 5 rm 3321.356 3384.754 -1645.678 3291.356 LRT p-Value 4 0.113528506 2 0.058878426 1 0.047056080 6 0.043924634 5 0.007988773 The term that will be dropped is: indus:nox. The updated model now has BIC value of 3380.222. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3380.222. The model now has 13 terms, of which 12 are fixed effects and 1 are/is random effect(s). There are 6 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(nox^2)" "I(tax^4)" [5] "rm" "black" R is going to fit 6 model(s), each time removing one variable that is eligible to be dropped from the original model. The 6 models were fitted in 0.28 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 2 I(nox^2) 3315.893 3375.064 -1643.946 3287.893 5 tax:indus 3317.844 3377.015 -1644.922 3289.844 1 black 3318.802 3377.974 -1645.401 3290.802 4 rm 3321.270 3380.441 -1646.635 3293.270 3 I(tax^4) 3331.789 3390.960 -1651.894 3303.789 LRT p-Value 2 3.012761e-01 5 8.226291e-02 1 4.609652e-02 4 1.112427e-02 3 3.808419e-05 The term that will be dropped is: I(nox^2). The updated model now has BIC value of 3375.064. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3375.064. The model now has 12 terms, of which 11 are fixed effects and 1 are/is random effect(s). There are 6 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(tax^4)" "nox" [5] "rm" "black" R is going to fit 6 model(s), each time removing one variable that is eligible to be dropped from the original model. The 6 models were fitted in 0.39 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 3 nox 3313.895 3368.840 -1643.947 3287.895 5 tax:indus 3317.089 3372.034 -1645.545 3291.089 1 black 3317.673 3372.618 -1645.836 3291.673 4 rm 3320.068 3375.013 -1647.034 3294.068 2 I(tax^4) 3331.512 3386.457 -1652.756 3305.512 LRT p-Value 3 9.633613e-01 5 7.379432e-02 1 5.187570e-02 4 1.295747e-02 2 2.697881e-05 The term that will be dropped is: nox. The updated model now has BIC value of 3368.84. The updated model is now: crim ~ (1 | chas) + tax + indus + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3368.84. The model now has 11 terms, of which 10 are fixed effects and 1 are/is random effect(s). There are 5 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(tax^4)" "rm" [5] "black" R is going to fit 5 model(s), each time removing one variable that is eligible to be dropped from the original model. The 5 models were fitted in 0.23 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 4 tax:indus 3315.245 3365.964 -1645.623 3291.245 1 black 3315.679 3366.398 -1645.840 3291.679 3 rm 3318.068 3368.787 -1647.034 3294.068 2 I(tax^4) 3330.544 3381.262 -1653.272 3306.544 5 tax:medv 3354.982 3405.701 -1665.491 3330.982 LRT p-Value 4 6.719743e-02 1 5.174068e-02 3 1.296900e-02 2 1.571574e-05 5 5.234800e-11 The term that will be dropped is: tax:indus. The updated model now has BIC value of 3365.964. The updated model is now: crim ~ (1 | chas) + tax + indus + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3365.964. The model now has 10 terms, of which 9 are fixed effects and 1 are/is random effect(s). There are 5 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" "indus" "rm" "black" R is going to fit 5 model(s), each time removing one variable that is eligible to be dropped from the original model. The 5 models were fitted in 0.25 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 3 indus 3313.545 3360.037 -1645.773 3291.545 1 black 3317.110 3363.602 -1647.555 3295.110 4 rm 3318.594 3365.086 -1648.297 3296.594 2 I(tax^4) 3340.540 3387.032 -1659.270 3318.540 5 tax:medv 3352.984 3399.475 -1665.492 3330.984 LRT p-Value 3 5.836679e-01 1 4.929836e-02 4 2.073488e-02 2 1.746427e-07 5 2.903598e-10 The term that will be dropped is: indus. The updated model now has BIC value of 3360.037. The updated model is now: crim ~ (1 | chas) + tax + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3360.037. The model now has 9 terms, of which 8 are fixed effects and 1 are/is random effect(s). There are 4 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" "rm" "black" R is going to fit 4 model(s), each time removing one variable that is eligible to be dropped from the original model. The 4 models were fitted in 0.18 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 1 black 3315.592 3357.858 -1647.796 3295.592 3 rm 3316.999 3359.265 -1648.500 3296.999 2 I(tax^4) 3340.163 3382.429 -1660.082 3320.163 4 tax:medv 3351.437 3393.702 -1665.718 3331.437 NA NA NA NA NA LRT p-Value 1 4.425196e-02 3 1.952550e-02 2 8.816185e-08 4 2.684644e-10 NA NA The term that will be dropped is: black. The updated model now has BIC value of 3357.858. The updated model is now: crim ~ (1 | chas) + tax + rm + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3357.858. The model now has 8 terms, of which 7 are fixed effects and 1 are/is random effect(s). There are 3 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" "rm" R is going to fit 3 model(s), each time removing one variable that is eligible to be dropped from the original model. The 3 models were fitted in 0.13 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 2 rm 3318.375 3356.414 -1650.188 3300.375 1 I(tax^4) 3343.808 3381.847 -1662.904 3325.808 3 tax:medv 3356.812 3394.850 -1669.406 3338.812 NA NA NA NA NA NA.1 NA NA NA NA LRT p-Value 2 2.874630e-02 1 3.866282e-08 3 4.893769e-11 NA NA NA.1 NA The term that will be dropped is: rm. The updated model now has BIC value of 3356.414. The updated model is now: crim ~ (1 | chas) + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has BIC value of 3356.414. The model now has 7 terms, of which 6 are fixed effects and 1 are/is random effect(s). There are 2 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" R is going to fit 2 model(s), each time removing one variable that is eligible to be dropped from the original model. The 2 models were fitted in 0.09 secs. The top five alternative models with the lowest BIC are: Term Dropped AIC BIC logLik deviance 1 I(tax^4) 3343.863 3377.676 -1663.932 3327.863 2 tax:medv 3355.454 3389.267 -1669.727 3339.454 NA NA NA NA NA NA.1 NA NA NA NA NA.2 NA NA NA NA LRT p-Value 1 1.580417e-07 2 4.069508e-10 NA NA NA.1 NA NA.2 NA The term that will be dropped is: I(tax^4). The updated model now has BIC value of 3377.676. The updated model is now: crim ~ (1 | chas) + tax + medv + I(tax^2) + I(tax^3) + tax:medv Dropping 'I(tax^4)' will result in an increase in BIC value, so I(tax^4) will be added back to the model and the model selection process terminates. [1] "-------------------------------------------------" [1] "-------------------------------------------------" [1] "-------------------------------------------------" $fullModel [1] "lmer(crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:rm + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio)" $finalModel [1] "lmer(crim ~ (1 | chas)+tax+medv+I(tax^2)+I(tax^3)+tax:medv+I(tax^4))" $termsKept NULL $direction [1] "backward" $method [1] "BIC" $origMethodValue [1] 3446.42 $path Term dropped BIC 1 indus:rm 3440.239 2 tax:indus:nox 3434.081 3 tax:black 3427.966 4 I(ptratio^2) 3422.193 5 indus:ptratio 3416.994 6 tax:rm:ptratio 3412.479 7 rm:ptratio 3406.811 8 tax:ptratio 3401.415 9 tax:rm 3396.509 10 ptratio 3391.821 11 I(indus^2) 3388.289 12 tax:nox 3383.944 13 indus:nox 3380.222 14 I(nox^2) 3375.064 15 nox 3368.840 16 tax:indus 3365.964 17 indus 3360.037 18 black 3357.858 19 rm 3356.414 20 I(tax^4) 3377.676 $runningTime [1] "10.42 secs" modSel/example2.txt0000644000076500000240000003067513211325001013310 0ustar00CiCistaff[1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has logLik value of -1793.901. The model now has 3 terms, including the intercept, of which 1 are fixed effects and 1 are/is random effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, tax, indus" 2 "tax:nox, tax, nox" 3 "tax:rm, tax" 4 "tax:ptratio, tax, ptratio" 5 "tax:black, tax, black" 6 "tax:medv, tax, medv" 7 "indus:nox, indus, nox" 8 "indus:rm, indus" 9 "indus:ptratio, indus, ptratio" 10 "rm:ptratio, ptratio" 11 "tax:indus:nox, tax:indus, tax:nox, indus:nox, tax, indus, nox" 12 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, tax, ptratio" 13 "tax, I(tax^2)" 14 "indus, I(indus^2)" 15 "nox, I(nox^2)" 16 "ptratio, I(ptratio^2)" 17 "tax, I(tax^2), I(tax^3)" 18 "tax, I(tax^2), I(tax^3), I(tax^4)" 19 "tax" 20 "indus" 21 "nox" 22 "ptratio" 23 "black" 24 "medv" R is going to fit 24 model(s),each time adding one group of variable that is eligible to be added to the original model. The 24 models were fitted in 1.08 secs. The top five alternative models with the highest LRT p-Value are: Group Considered AIC BIC 20 tax:medv, tax, medv 3347.916 3377.502 16 tax, I(tax^2), I(tax^3), I(tax^4) 3369.302 3403.115 14 tax, I(tax^2) 3389.833 3415.192 15 tax, I(tax^2), I(tax^3) 3390.517 3420.103 17 tax:black, tax, black 3393.018 3422.604 logLik deviance LRT p-Value 20 -1666.958 3333.916 9.444013e-55 16 -1676.651 3353.302 1.417982e-49 14 -1688.916 3377.833 2.544397e-46 15 -1688.258 3376.517 1.535359e-45 17 -1689.509 3379.018 5.331820e-45 The group of terms that will be added is: tax:medv + tax + medv. The updated model is now: crim ~ (1 | chas) + rm + tax:medv + tax + medv The updated model has logLik value of: -1666.958. The LRT test's pValue to compare this alternative model with the null model is: 7.692058e-18. [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has logLik value of -1666.958. The model now has 6 terms, including the intercept, of which 4 are fixed effects and 1 are/is random effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, indus" 2 "tax:nox, nox" 3 "tax:rm" 4 "tax:ptratio, ptratio" 5 "tax:black, black" 6 "indus:nox, indus, nox" 7 "indus:rm, indus" 8 "indus:ptratio, indus, ptratio" 9 "rm:ptratio, ptratio" 10 "tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox" 11 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio" 12 "I(tax^2)" 13 "indus, I(indus^2)" 14 "nox, I(nox^2)" 15 "ptratio, I(ptratio^2)" 16 "I(tax^2), I(tax^3)" 17 "I(tax^2), I(tax^3), I(tax^4)" 18 "indus" 19 "nox" 20 "ptratio" 21 "black" R is going to fit 21 model(s),each time adding one group of variable that is eligible to be added to the original model. The 21 models were fitted in 0.95 secs. The top five alternative models with the highest LRT p-Value are: Group Considered 4 I(tax^2), I(tax^3), I(tax^4) 17 tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox 2 I(tax^2) 6 indus, I(indus^2) 1 black AIC BIC logLik deviance LRT p-Value 4 3315.592 3357.858 -1647.796 3295.592 2.413662e-08 17 3334.352 3389.297 -1654.176 3308.352 2.683382e-04 2 3343.020 3376.832 -1663.510 3327.020 8.638649e-03 6 3342.512 3380.551 -1662.256 3324.512 9.074664e-03 1 3343.775 3377.587 -1663.887 3327.775 1.320418e-02 The group of terms that will be added is: I(tax^2) + I(tax^3) + I(tax^4). The updated model is now: crim ~ (1 | chas) + rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) The updated model has logLik value of: -1647.796. The LRT test's pValue to compare this alternative model with the null model is: 0.01320418. [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has logLik value of -1647.796. The model now has 9 terms, including the intercept, of which 7 are fixed effects and 1 are/is random effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, indus" 2 "tax:nox, nox" 3 "tax:rm" 4 "tax:ptratio, ptratio" 5 "tax:black, black" 6 "indus:nox, indus, nox" 7 "indus:rm, indus" 8 "indus:ptratio, indus, ptratio" 9 "rm:ptratio, ptratio" 10 "tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox" 11 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio" 12 "indus, I(indus^2)" 13 "nox, I(nox^2)" 14 "ptratio, I(ptratio^2)" 15 "indus" 16 "nox" 17 "ptratio" 18 "black" R is going to fit 18 model(s),each time adding one group of variable that is eligible to be added to the original model. The 18 models were fitted in 0.89 secs. The top five alternative models with the highest LRT p-Value are: Group Considered 1 black 18 tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio 16 tax:ptratio, ptratio 12 tax:black, black 13 tax:indus, indus AIC BIC logLik deviance LRT p-Value 1 3313.545 3360.037 -1645.773 3291.545 0.04425196 18 3315.835 3379.233 -1642.918 3285.835 0.08241529 16 3314.811 3365.530 -1645.406 3290.811 0.09158049 12 3315.542 3366.260 -1645.771 3291.542 0.13193724 13 3315.679 3366.398 -1645.840 3291.679 0.14132996 The group of terms that will be added is: black. The updated model is now: crim ~ (1 | chas) + rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) + black The updated model has logLik value of: -1645.773. The LRT test's pValue to compare this alternative model with the null model is: 0.04425196. [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has logLik value of -1645.773. The model now has 10 terms, including the intercept, of which 8 are fixed effects and 1 are/is random effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, indus" 2 "tax:nox, nox" 3 "tax:rm" 4 "tax:ptratio, ptratio" 5 "tax:black" 6 "indus:nox, indus, nox" 7 "indus:rm, indus" 8 "indus:ptratio, indus, ptratio" 9 "rm:ptratio, ptratio" 10 "tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox" 11 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio" 12 "indus, I(indus^2)" 13 "nox, I(nox^2)" 14 "ptratio, I(ptratio^2)" 15 "indus" 16 "nox" 17 "ptratio" R is going to fit 17 model(s),each time adding one group of variable that is eligible to be added to the original model. The 17 models were fitted in 0.8 secs. The top five alternative models with the highest LRT p-Value are: Group Considered 17 tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio 15 tax:ptratio, ptratio 12 tax:indus, indus 16 tax:rm 14 tax:nox, nox AIC BIC logLik deviance LRT p-Value 17 3314.500 3382.125 -1641.250 3282.500 0.1072705 15 3313.768 3368.713 -1643.884 3287.768 0.1512361 12 3313.895 3368.840 -1643.947 3287.895 0.1611751 16 3314.531 3365.249 -1645.265 3290.531 0.3137753 14 3315.653 3370.598 -1644.826 3289.653 0.3881221 The group of terms that will be added is: tax:rm:ptratio + tax:rm + tax:ptratio + rm:ptratio + ptratio. The updated model is now: crim ~ (1 | chas) + rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) + black + tax:rm:ptratio + tax:rm + tax:ptratio + rm:ptratio + ptratio The updated model has logLik value of: -1641.25. The LRT test's pValue to compare this alternative model with the null model is: 0.9060073. Adding 'tax:rm:ptratio + tax:rm + tax:ptratio + rm:ptratio + ptratio' will result in an increase in logLik value. So the term(s) will not be added to the model and the model selection process terminates. [1] "-------------------------------------------------" [1] "-------------------------------------------------" [1] "-------------------------------------------------" $finalModel [1] "lmer(crim ~ (1 | chas) + rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) + black)" $termsKept [1] "rm" $direction [1] "forward" $method [1] "LRT" $path LRT pValue c("tax:medv", "tax", "medv") 7.692058e-18 c("I(tax^2)", "I(tax^3)", "I(tax^4)") 1.320418e-02 black 4.425196e-02 c("tax:rm:ptratio", "tax:rm", "tax:ptratio", "rm:ptratio", "ptratio") 9.060073e-01 $alpha [1] 0.05 $runningTime [1] "4.22 secs" modSel/example3.txt0000644000076500000240000001676513211325005013321 0ustar00CiCistaff[1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has 2 terms, including the intercept, of which 1 are/is fixed effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, tax, indus" 2 "tax:nox, tax, nox" 3 "tax:rm, tax" 4 "tax:ptratio, tax, ptratio" 5 "tax:black, tax, black" 6 "tax:medv, tax, medv" 7 "indus:nox, indus, nox" 8 "indus:rm, indus" 9 "indus:ptratio, indus, ptratio" 10 "rm:ptratio, ptratio" 11 "tax:indus:nox, tax:indus, tax:nox, indus:nox, tax, indus, nox" 12 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, tax, ptratio" 13 "tax, I(tax^2)" 14 "indus, I(indus^2)" 15 "nox, I(nox^2)" 16 "ptratio, I(ptratio^2)" 17 "tax, I(tax^2), I(tax^3)" 18 "tax, I(tax^2), I(tax^3), I(tax^4)" 19 "tax" 20 "indus" 21 "nox" 22 "ptratio" 23 "black" 24 "medv" The top five most significant group of terms are: Terms Considered F-test p-Value 6 tax:medv, tax, medv 2.923040e-54 18 tax, I(tax^2), I(tax^3), I(tax^4) 4.513501e-49 13 tax, I(tax^2) 5.834653e-46 17 tax, I(tax^2), I(tax^3) 3.925719e-45 5 tax:black, tax, black 1.348082e-44 The group of terms that will be added is: tax:medv + tax + medv. The updated model is now: crim ~ rm + tax:medv + tax + medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has 5 terms, including the intercept, of which 4 are/is fixed effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, indus" 2 "tax:nox, nox" 3 "tax:rm" 4 "tax:ptratio, ptratio" 5 "tax:black, black" 6 "indus:nox, indus, nox" 7 "indus:rm, indus" 8 "indus:ptratio, indus, ptratio" 9 "rm:ptratio, ptratio" 10 "tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox" 11 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio" 12 "I(tax^2)" 13 "indus, I(indus^2)" 14 "nox, I(nox^2)" 15 "ptratio, I(ptratio^2)" 16 "I(tax^2), I(tax^3)" 17 "I(tax^2), I(tax^3), I(tax^4)" 18 "indus" 19 "nox" 20 "ptratio" 21 "black" The top five most significant group of terms are: Terms Considered 17 I(tax^2), I(tax^3), I(tax^4) 10 tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox 12 I(tax^2) 13 indus, I(indus^2) 21 black F-test p-Value 17 3.184184e-08 10 3.259510e-04 12 9.077710e-03 13 9.684604e-03 21 1.380694e-02 The group of terms that will be added is: I(tax^2) + I(tax^3) + I(tax^4). The updated model is now: crim ~ rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" The model now has 8 terms, including the intercept, of which 7 are/is fixed effect(s). The groups of terms that are eligible to be added are: AddGroup 1 "tax:indus, indus" 2 "tax:nox, nox" 3 "tax:rm" 4 "tax:ptratio, ptratio" 5 "tax:black, black" 6 "indus:nox, indus, nox" 7 "indus:rm, indus" 8 "indus:ptratio, indus, ptratio" 9 "rm:ptratio, ptratio" 10 "tax:indus:nox, tax:indus, tax:nox, indus:nox, indus, nox" 11 "tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio" 12 "indus, I(indus^2)" 13 "nox, I(nox^2)" 14 "ptratio, I(ptratio^2)" 15 "indus" 16 "nox" 17 "ptratio" 18 "black" The top five most significant group of terms are: Terms Considered 18 black 11 tax:rm:ptratio, tax:rm, tax:ptratio, rm:ptratio, ptratio 4 tax:ptratio, ptratio 5 tax:black, black 1 tax:indus, indus F-test p-Value 18 0.04629174 11 0.08952398 4 0.09601093 5 0.13732558 1 0.14690211 The group of terms that will be added is: black. The updated model is now: crim ~ rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4) + black Adding 'black' will result in an F-test p-Value bigger than alpha, so the term(s) will not be added and the process terminates. [1] "-------------------------------------------------" [1] "-------------------------------------------------" [1] "-------------------------------------------------" $finalModel [1] "lm(crim ~ rm + tax:medv + tax + medv + I(tax^2) + I(tax^3) + I(tax^4))" $termsKept [1] "rm" $direction [1] "forward" $path TermsAdded pValue 1 tax:medv, tax, medv 2.923040e-54 2 I(tax^2), I(tax^3), I(tax^4) 3.184184e-08 3 black 4.629174e-02 $alpha [1] 0.025 $runningTime [1] "0.45 secs" modSel/example4.txt0000644000076500000240000005163213211325005013312 0ustar00CiCistaff[1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 10 terms that are eligible to be dropped and these are: [1] "tax:black" "tax:medv" "indus:rm" [4] "indus:ptratio" "tax:indus:nox" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(ptratio^2)" [10] "I(tax^4)" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.193852e-03 0.03224744 black 0.0030431818 2.322422e-02 0.13103483 tax:nox -0.0268779707 1.754916e-01 -0.15315820 (Intercept) 31.2375001451 1.604815e+02 0.19464857 indus:rm -0.0310971115 1.525180e-01 -0.20389137 Pr(>|t|) tax:indus 0.9742881 black 0.8958027 tax:nox 0.8783379 (Intercept) 0.8457504 indus:rm 0.8385248 The term that will be dropped is: indus:rm. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 9 terms that are eligible to be dropped and these are: [1] "tax:black" "tax:medv" "indus:ptratio" [4] "tax:indus:nox" "tax:rm:ptratio" "I(indus^2)" [7] "I(nox^2)" "I(ptratio^2)" "I(tax^4)" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001380703 5.186672e-03 0.02662021 black 0.0027824850 2.316588e-02 0.12011133 (Intercept) 20.2597060499 1.510294e+02 0.13414411 tax:nox -0.0276289973 1.752780e-01 -0.15762958 tax:indus:nox -0.0025138931 9.870687e-03 -0.25468269 Pr(>|t|) tax:indus 0.9787737 black 0.9044451 (Intercept) 0.8933447 tax:nox 0.8748148 tax:indus:nox 0.7990770 The term that will be dropped is: tax:indus:nox. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:black" [4] "tax:medv" "indus:nox" "indus:ptratio" [7] "tax:rm:ptratio" "I(indus^2)" "I(nox^2)" [10] "I(ptratio^2)" "I(tax^4)" The top least significant terms are: Estimate Std. Error t value (Intercept) 6.482811e+00 1.408727e+02 0.04601894 black 2.269076e-03 2.305561e-02 0.09841752 tax:black -1.185031e-05 3.634040e-05 -0.32609196 indus -4.429237e-01 9.145335e-01 -0.48431657 I(ptratio^2) 6.254322e-02 9.160869e-02 0.68272145 Pr(>|t|) (Intercept) 0.9633142 black 0.9216417 tax:black 0.7444963 indus 0.6283812 I(ptratio^2) 0.4951109 The term that will be dropped is: tax:black. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "indus:ptratio" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(ptratio^2)" [10] "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value (Intercept) 11.14187139 140.01654939 0.07957539 indus -0.45972967 0.91223504 -0.50395967 I(ptratio^2) 0.06000549 0.09119306 0.65800501 indus:ptratio -0.03715131 0.03597674 -1.03264807 rm -18.78294960 16.65694754 -1.12763455 Pr(>|t|) (Intercept) 0.9366079 indus 0.6145195 I(ptratio^2) 0.5108484 indus:ptratio 0.3022854 rm 0.2600341 The term that will be dropped is: I(ptratio^2). The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "indus:ptratio" "tax:rm:ptratio" [7] "I(indus^2)" "I(nox^2)" "I(tax^4)" [10] "black" The top least significant terms are: Estimate Std. Error t value (Intercept) -31.47713785 124.06217536 -0.2537207 indus -0.54243101 0.90300588 -0.6006949 ptratio -5.52237336 5.72888714 -0.9639522 rm -15.72024001 15.98403904 -0.9834961 indus:ptratio -0.03557971 0.03587634 -0.9917320 Pr(>|t|) (Intercept) 0.7998191 indus 0.5483244 ptratio 0.3355513 rm 0.3258547 indus:ptratio 0.3218238 The term that will be dropped is: indus:ptratio. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio + tax:rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 9 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" [4] "indus:nox" "tax:rm:ptratio" "I(indus^2)" [7] "I(nox^2)" "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value (Intercept) -39.158251377 1.238181e+02 -0.3162563 ptratio -5.058599421 5.709672e+00 -0.8859702 rm -15.882638141 1.598293e+01 -0.9937251 rm:ptratio 0.827158377 8.182136e-01 1.0109321 tax:ptratio 0.017781843 1.641860e-02 1.0830301 tax:rm 0.056664249 4.695070e-02 1.2068882 tax:rm:ptratio -0.003023644 2.358738e-03 -1.2818909 Pr(>|t|) (Intercept) 0.7519440 ptratio 0.3760726 rm 0.3208522 rm:ptratio 0.3125530 tax:ptratio 0.2793332 tax:rm 0.2280635 tax:rm:ptratio 0.2004934 The term that will be dropped is: tax:rm:ptratio. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 11 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" [4] "tax:ptratio" "tax:medv" "indus:nox" [7] "rm:ptratio" "I(indus^2)" "I(nox^2)" [10] "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) rm:ptratio -0.175698578 0.239823070 -0.7326175 0.4641449 tax:rm -0.003294708 0.004074111 -0.8086937 0.4190870 rm 3.777324887 4.501860812 0.8390586 0.4018490 ptratio 1.819859065 1.952650590 0.9319942 0.3518023 tax:ptratio -0.002925831 0.002937695 -0.9959614 0.3197645 The term that will be dropped is: rm:ptratio. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" [4] "tax:ptratio" "tax:medv" "indus:nox" [7] "I(indus^2)" "I(nox^2)" "I(tax^4)" [10] "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) rm 0.920465011 2.248476309 0.4093728 0.6824461 ptratio 0.593300730 1.004436061 0.5906804 0.5550086 tax:ptratio -0.002596571 0.002901732 -0.8948348 0.3713174 tax:rm -0.004287876 0.003840127 -1.1165976 0.2647170 I(indus^2) -0.024266442 0.016790727 -1.4452288 0.1490368 The term that will be dropped is: tax:ptratio. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 10 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:rm" "tax:medv" [5] "indus:nox" "I(indus^2)" "I(nox^2)" "I(tax^4)" [9] "ptratio" "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) rm 0.994610237 2.246490252 0.4427396 0.65815026 tax:rm -0.004334052 0.003838996 -1.1289547 0.25947162 ptratio -0.286593964 0.204909232 -1.3986386 0.16255657 black -0.005780153 0.003612625 -1.5999869 0.11024865 tax:nox -0.071070374 0.040371000 -1.7604313 0.07896108 The term that will be dropped is: tax:rm. The updated model is now: crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 9 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(indus^2)" "I(nox^2)" "I(tax^4)" "ptratio" [9] "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) ptratio -0.246342957 0.201840263 -1.220485 0.22286939 black -0.005688535 0.003612727 -1.574582 0.11599971 tax:nox -0.073688544 0.040315647 -1.827790 0.06819021 I(indus^2) -0.028120487 0.015077127 -1.865109 0.06276483 indus -1.257720458 0.670354020 -1.876203 0.06122276 The term that will be dropped is: ptratio. The updated model is now: crim ~ tax + indus + nox + rm + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 8 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(indus^2)" "I(nox^2)" "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) I(indus^2) -0.023678186 0.0146384984 -1.617528 0.10640780 black -0.005973925 0.0036069524 -1.656225 0.09831646 indus -1.135383279 0.6631486253 -1.712110 0.08750914 tax:nox -0.077693596 0.0402019424 -1.932583 0.05386354 tax:indus -0.001301136 0.0006436617 -2.021459 0.04377539 The term that will be dropped is: I(indus^2). The updated model is now: crim ~ tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 7 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:nox" "tax:medv" "indus:nox" [5] "I(nox^2)" "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) indus -0.795925041 0.6301005116 -1.263172 0.20712702 tax:nox -0.048785830 0.0360703814 -1.352518 0.17683230 black -0.006473133 0.0035996351 -1.798275 0.07274759 tax:indus -0.001228510 0.0006431498 -1.910146 0.05669667 indus:nox 2.246246490 1.1411559548 1.968396 0.04958467 The term that will be dropped is: tax:nox. The updated model is now: crim ~ tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv + indus:nox [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 6 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "indus:nox" "I(nox^2)" [5] "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) indus -0.441675190 0.573566709 -0.7700503 0.44163966 indus:nox 1.642055489 1.050971257 1.5624171 0.11883272 nox 63.989615915 35.213693516 1.8171799 0.06979778 I(nox^2) -69.671795769 37.336066468 -1.8660722 0.06262604 black -0.007023031 0.003579614 -1.9619522 0.05033138 The term that will be dropped is: indus:nox. The updated model is now: crim ~ tax + indus + nox + rm + black + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 5 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(nox^2)" "I(tax^4)" [5] "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) nox 24.558218951 2.459348e+01 0.9985664 0.31849467 I(nox^2) -19.785485009 1.938092e+01 -1.0208745 0.30781457 tax:indus -0.001084462 6.313087e-04 -1.7177990 0.08646121 indus 0.384671173 2.222575e-01 1.7307453 0.08412291 black -0.007071183 3.584709e-03 -1.9725963 0.04909991 The term that will be dropped is: I(nox^2). The updated model is now: crim ~ tax + indus + nox + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 5 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(tax^4)" "nox" [5] "black" The top least significant terms are: Estimate Std. Error t value Pr(>|t|) nox -0.188416560 4.1512367467 -0.04538805 0.96381634 tax:indus -0.001115739 0.0006305917 -1.76935268 0.07745194 indus 0.416228967 0.2201067095 1.89103262 0.05920482 black -0.006890925 0.0035805104 -1.92456509 0.05485779 rm -1.549378075 0.6291115634 -2.46280336 0.01412563 The term that will be dropped is: nox. The updated model is now: crim ~ tax + indus + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:indus + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 4 terms that are eligible to be dropped and these are: [1] "tax:indus" "tax:medv" "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value tax:indus -1.121216e-03 6.183131e-04 -1.813347 indus 4.160774e-01 2.198594e-01 1.892470 black -6.880370e-03 3.569347e-03 -1.927627 rm -1.548585e+00 6.282346e-01 -2.464979 (Intercept) -1.153536e+02 2.931091e+01 -3.935519 Pr(>|t|) tax:indus 7.038373e-02 indus 5.901156e-02 black 5.447332e-02 rm 1.404053e-02 (Intercept) 9.489859e-05 The term that will be dropped is: tax:indus. The updated model is now: crim ~ tax + indus + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 4 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" "indus" "black" The top least significant terms are: Estimate Std. Error t value indus 3.745330e-02 0.069016323 0.542673 black -6.976218e-03 0.003577178 -1.950201 rm -1.438962e+00 0.626759976 -2.295875 (Intercept) -1.212043e+02 29.199919282 -4.150842 medv 5.250663e-01 0.117575449 4.465782 Pr(>|t|) indus 5.875987e-01 black 5.171506e-02 rm 2.209928e-02 (Intercept) 3.898900e-05 medv 9.887323e-06 The term that will be dropped is: indus. The updated model is now: crim ~ tax + rm + black + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 3 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" "black" The top least significant terms are: Estimate Std. Error t value black -7.121182e-03 0.003564657 -1.997718 rm -1.452378e+00 0.625827585 -2.320732 (Intercept) -1.141821e+02 26.157589839 -4.365161 medv 5.110063e-01 0.114603880 4.458892 tax 1.363977e+00 0.282345806 4.830875 Pr(>|t|) black 4.629174e-02 rm 2.070515e-02 (Intercept) 1.546206e-05 medv 1.019353e-05 tax 1.813477e-06 The term that will be dropped is: black. The updated model is now: crim ~ tax + rm + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv [1] "-----------------------------------------------------------" [1] "-----------------------------------------------------------" There are 2 terms that are eligible to be dropped and these are: [1] "tax:medv" "I(tax^4)" The top least significant terms are: Estimate Std. Error t value rm -1.361472e+00 6.260426e-01 -2.174728 medv 5.162777e-01 1.149170e-01 4.492613 (Intercept) -1.200799e+02 2.606838e+01 -4.606344 tax 1.395734e+00 2.827432e-01 4.936404 I(tax^2) -5.675687e-03 1.102523e-03 -5.147909 I(tax^3) 9.952142e-06 1.841835e-06 5.403383 I(tax^4) -6.117475e-09 1.105104e-09 -5.535657 Pr(>|t|) rm 3.012042e-02 medv 8.754914e-06 (Intercept) 5.211236e-06 tax 1.087378e-06 I(tax^2) 3.798461e-07 I(tax^3) 1.015402e-07 I(tax^4) 5.022742e-08 The term that will be dropped is: I(tax^4). The updated model is now: crim ~ tax + rm + medv + I(tax^2) + I(tax^3) + tax:medv 'I(tax^4)' has a pValue smaller than alpha, so 'I(tax^4)' will be added back to the model and the model selection process terminates. [1] "-------------------------------------------------" [1] "-------------------------------------------------" [1] "-------------------------------------------------" $fullModel [1] "lm(crim ~ tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:rm + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio)" $finalModel [1] "lm(crim ~ tax+rm+medv+I(tax^2)+I(tax^3)+tax:medv+I(tax^4))" $termsKept [1] "rm" $direction [1] "backward" $path vDrop pValue Pr(>|t|) indus:rm 8.385248e-01 Pr(>|t|)1 tax:indus:nox 7.990770e-01 Pr(>|t|)2 tax:black 7.444963e-01 Pr(>|t|)3 I(ptratio^2) 5.108484e-01 Pr(>|t|)4 indus:ptratio 3.218238e-01 Pr(>|t|)5 tax:rm:ptratio 2.004934e-01 Pr(>|t|)6 rm:ptratio 4.641449e-01 Pr(>|t|)7 tax:ptratio 3.713174e-01 Pr(>|t|)8 tax:rm 2.594716e-01 Pr(>|t|)9 ptratio 2.228694e-01 Pr(>|t|)10 I(indus^2) 1.064078e-01 Pr(>|t|)11 tax:nox 1.768323e-01 Pr(>|t|)12 indus:nox 1.188327e-01 Pr(>|t|)13 I(nox^2) 3.078146e-01 Pr(>|t|)14 nox 9.638163e-01 Pr(>|t|)15 tax:indus 7.038373e-02 Pr(>|t|)16 indus 5.875987e-01 Pr(>|t|)17 black 4.629174e-02 Pr(>|t|)18 I(tax^4) 5.022742e-08 $alpha [1] 0.025 $runningTime [1] "0.28 secs" modSel/example6.txt0000644000076500000240000005061313211325004013311 0ustar00CiCistaff[1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 The term that will be dropped is indus:rm. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 tax:indus:nox -0.0025044943 9.623445e-03 -0.26024925 The term that will be dropped is tax:indus:nox. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 The term that will be dropped is tax:indus. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 The term that will be dropped is tax:nox. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 The term that will be dropped is tax:black. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 The term that will be dropped is black. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 The term that will be dropped is I(ptratio^2). The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + indus:ptratio + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 The term that will be dropped is indus:ptratio. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 tax:ptratio 2.430652e-02 1.891884e-02 1.28477860 I(indus^2) -2.558391e-02 1.893027e-02 -1.35148107 The term that will be dropped is I(indus^2). The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio + tax:rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 tax:ptratio 2.430652e-02 1.891884e-02 1.28477860 I(indus^2) -2.558391e-02 1.893027e-02 -1.35148107 tax:rm:ptratio -3.701304e-03 2.699979e-03 -1.37086379 The term that will be dropped is tax:rm:ptratio. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + rm:ptratio + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 The term that will be dropped is rm:ptratio. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:rm + tax:ptratio + tax:medv + indus:nox + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 The term that will be dropped is tax:rm. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:ptratio + tax:medv + indus:nox + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 tax:ptratio 2.430652e-02 1.891884e-02 1.28477860 The term that will be dropped is tax:ptratio. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:medv + indus:nox + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 The term that will be dropped is ptratio. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:medv + indus:nox + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 tax:ptratio 2.430652e-02 1.891884e-02 1.28477860 I(indus^2) -2.558391e-02 1.893027e-02 -1.35148107 tax:rm:ptratio -3.701304e-03 2.699979e-03 -1.37086379 indus:nox 4.655374e+00 3.038110e+00 1.53232540 The term that will be dropped is indus:nox. The updated model is now: crim ~ (1 | chas) + tax + indus + nox + rm + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:medv + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 0.0001674884 5.058653e-03 0.03310929 black 0.0030431819 2.261968e-02 0.13453690 tax:nox -0.0268779706 1.709234e-01 -0.15725154 (Intercept) 31.2374984544 1.563041e+02 0.19985078 indus:rm -0.0310971102 1.485479e-01 -0.20934060 tax:indus:nox -0.0025044943 9.623445e-03 -0.26024925 indus -0.5843739448 2.191973e+00 -0.26659723 The term that will be dropped is indus. The updated model is now: crim ~ (1 | chas) + tax + nox + rm + medv + I(tax^2) + I(nox^2) + I(tax^3) + I(tax^4) + tax:medv + 1 [1] "-------------------------------------------------" [1] "-------------------------------------------------" The top least significant terms are: Estimate Std. Error t value tax:indus 1.674884e-04 5.058653e-03 0.03310929 black 3.043182e-03 2.261968e-02 0.13453690 tax:nox -2.687797e-02 1.709234e-01 -0.15725154 (Intercept) 3.123750e+01 1.563041e+02 0.19985078 indus:rm -3.109711e-02 1.485479e-01 -0.20934060 tax:indus:nox -2.504494e-03 9.623445e-03 -0.26024925 indus -5.843739e-01 2.191973e+00 -0.26659723 tax:black -1.297411e-05 3.562471e-05 -0.36418834 I(ptratio^2) 6.574626e-02 8.999233e-02 0.73057626 tax 6.326939e-01 7.031410e-01 0.89981087 indus:ptratio -4.051528e-02 3.955993e-02 -1.02414938 rm -2.052793e+01 1.817706e+01 -1.12933178 rm:ptratio 1.083527e+00 9.260817e-01 1.17001269 ptratio -9.388455e+00 7.793890e+00 -1.20459166 nox 9.511671e+01 7.714564e+01 1.23295003 tax:rm 7.040135e-02 5.601379e-02 1.25685742 tax:ptratio 2.430652e-02 1.891884e-02 1.28477860 I(indus^2) -2.558391e-02 1.893027e-02 -1.35148107 tax:rm:ptratio -3.701304e-03 2.699979e-03 -1.37086379 indus:nox 4.655374e+00 3.038110e+00 1.53232540 medv 4.050491e-01 2.085061e-01 1.94262503 I(nox^2) -1.086419e+02 5.043113e+01 -2.15426230 The term that will be dropped is I(nox^2). The updated model is now: crim ~ (1 | chas) + tax + nox + rm + medv + I(tax^2) + I(tax^3) + I(tax^4) + tax:medv + 1 'I(nox^2)' has a bigger tValue than zValue calculated from the alpha the user provided, so 'I(nox^2)' will be added back to the model and the model selection process terminates. We are using normal approximation to calculate the t cut off. For example, if alpha = 0.05, the cut-off is approximately 1.96. [1] "-------------------------------------------------" [1] "-------------------------------------------------" [1] "-------------------------------------------------" $fullModel [1] "lmer(crim ~ (1 | chas) + tax + indus + nox + rm + ptratio + black + medv + I(tax^2) + I(indus^2) + I(nox^2) + I(ptratio^2) + I(tax^3) + I(tax^4) + tax:indus + tax:nox + tax:rm + tax:ptratio + tax:black + tax:medv + indus:nox + indus:rm + indus:ptratio + rm:ptratio + tax:indus:nox + tax:rm:ptratio)" $finalModel [1] "lmer(crim ~ (1 | chas)+tax+nox+rm+medv+I(tax^2)+I(tax^3)+I(tax^4)+tax:medv+I(nox^2))" $termsKept [1] "rm" $direction [1] "backward" $method [1] "tValue" $path Term dropped tValue t value indus:rm 0.20934060 t value1 tax:indus:nox 0.26024925 t value2 tax:indus 0.03310929 t value3 tax:nox 0.15725154 t value4 tax:black 0.36418834 t value5 black 0.13453690 t value6 I(ptratio^2) 0.73057626 t value7 indus:ptratio 1.02414938 t value8 I(indus^2) 1.35148107 t value9 tax:rm:ptratio 1.37086379 t value10 rm:ptratio 1.17001269 t value11 tax:rm 1.25685742 t value12 tax:ptratio 1.28477860 t value13 ptratio 1.20459166 t value14 indus:nox 1.53232540 t value15 indus 0.26659723 t value16 I(nox^2) 2.15426230 $threshold alpha zValue 0.050000 1.644854 $runningTime [1] "3.32 secs" modSel/man/0000755000076500000240000000000013211325404011601 5ustar00CiCistaffmodSel/man/modelSelectionF.Rd0000644000076500000240000001402013167027214015151 0ustar00CiCistaff\name{modelSelection} \alias{modelSelection} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Model Selection } \description{ It is important to preserve the fixed effects hierarchy. The function \emph{modelSelection} performs backward or forward model selection with both mixed or fixed effects models based on different criterions while preserving the effects hierarchy. } \usage{ modelSelection(dat, response, random = NULL, fixed, keep = NULL, direction, method = "AIC", alpha = 0.05, sinkit = NULL) } \arguments{ \item{dat}{ A data frame containing the variables named in inputs fixed, random, and response. } \item{response}{ A string containing the name of the dependent variable. } \item{random}{ A vector containing all the random effects. Optional. } \item{fixed}{ A vector containing all the fixed effetcs, including main effects, polynomial terms of any degree, and interections among any number of main effects. } \item{keep}{ A vector containing any term(s) you want to keep in the model. Optional. } \item{direction}{ A string representing the direction: \emph{"forward"} or \emph{"backward"}. } \item{method}{ A string representing the method for choosing the model at each step for mixed model selection. Options are: \emph{"AIC"}, \emph{"BIC"}, \emph{"LRT"} (Likelihood Ratio Test) and \emph{"tValue"}. Default is "AIC". } \item{alpha}{ A numeric value representing the significance level for fixed model selection and LRT and tValue options for mixed model selection. Default is 0.05. } \item{sinkit}{ A string designating a file name that will be created in the working directory and that will contain the model selection process steps. If the file name is not provided, all the intermediate steps will be printed to R console. } } \details{ For backward model selection, the program will start with a full model with all the terms the user provided. If there is no random effect and all terms are continuous, a t-test will be used to drop the least significant term; if there is at least one categorical variable, an F-test will be used. If there is a random effect, the user has the option of using \emph{AIC}, \emph{BIC}, \emph{LRT} and \emph{tValue} (tValue only if all variables are continuous) to evaluate competing models. For forward model selection, the program will start with the random effects (if any were provided) and will sequentially add fixed effects while preserving effect hierarchy. If certain terms are designated to be in the model via input \emph{keep}, those will be entered first. If there is no random effect, an F-test will be used to evaluate competing models. If there is a random effect, the user has the option of using \emph{AIC}, \emph{BIC}, and \emph{LRT} to evaluate competing models. The random effects are always kept in the model regardless of the type of selection. Mixed effect models are fitted using \emph{lmer} function from \emph{lme4} package. } \value{ \itemize{ The function will return a list of the following items: \item \emph{fullModel:} the original model if applicable. \item \emph{finalModel:} the final model. \item \emph{termKept:} the terms that are kept in the model selection process at every stage if applicable. \item \emph{direction:} the selection direction: \emph{forward} or \emph{backward}. \item \emph{method:} the method used to evaluate the models. \item \emph{origMethodValue:} the \emph{AIC}, \emph{BIC} or \emph{log likelihood} value for the original model if applicable. \item \emph{path:} the sequence of variables added or dropped at each stage. \item \emph{alpha:} the significance level if applicable. For backward mixed model selection using the \emph{tValue} option, the z-value approximation is also provided. \item \emph{runningTime:} the time it took to run the program. } } \author{ ChengCheng Zhai , Danel Draguljić } %%\note{ %%No automated process can replace human thinking. There is no guarantee that %%\emph{modSel()} will give you the best model. %%} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ /uploads/files/522947443609815338-exampleforwebsite.pdf } \reference{ Kutner, M. H., Nachtsheim, C., Neter, J., & Li, W. (2005). Applied linear statistical models. } \examples{ \dontshow{ library(MASS) fxd = c("tax", "indus", "nox", "rm", "I(tax^2)", "I(indus^2)", "I(tax^3)", "tax:indus", "tax:nox", "tax:rm") # fixed model/backward/alpha = 0.025 modelSelection(response = "crim", fixed = fxd, direction = "backward", dat = Boston, alpha = 0.025) } \donttest{ # Use the Boston data set from MASS library(MASS) # make chas a factor to use it as a random effect Boston$chas = as.factor(Boston$chas) fxd = c("tax", "indus", "nox", "rm", "ptratio", "black", "medv", "I(tax^2)", "I(indus^2)", "I(nox^2)", "I(ptratio^2)", "I(tax^3)", "I(tax^4)", "tax:indus", "tax:nox", "tax:rm", "tax:ptratio", "tax:black", "tax:medv", "indus:nox", "indus:rm", "indus:ptratio", "rm:ptratio", "tax:nox:indus", "tax:rm:ptratio") rndm = c("(1 | chas)") keep = c("rm") # mixed model/backward/BIC modelSelection(response = "crim", random = rndm, fixed = fxd, dat = Boston, direction = "backward", method = "BIC", sinkit = "example.txt") # mixed model/forward/LRT/alpha default modelSelection(response = "crim", random = rndm, fixed = fxd, direction = "forward", keep = keep, method = "LRT", dat = Boston) # fixed model/forward/alpha = 0.025 modelSelection(response = "crim", fixed = fxd, direction = "forward", keep = keep, dat = Boston, alpha = 0.025) } } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{effect hierarchy} \keyword{model selection} \keyword{mixed model}