all of the data I am working with has already been combined, so I am creating new random folders for this lab
##################################################
# function: file_creator
# purpose: create a set of random files for regression
# input: file_n = number of files to create
# : file_folder = name of folder for random files
# : file_size = c(min,max) number of rows in file
# : file_na = number on average of NA values per column
# output: set of random files
#-------------------------------------------------
file_creator <- function(file_n=15,
file_folder="NewFiles/",
file_size=c(60,1000),
file_na=14){
for (i in seq_len(file_n)) {
file_length <- sample(file_size[1]:file_size[2],size=1) # get number of rows
var_x <- runif(file_length) # create random x
var_y <- runif(file_length) # create random y
df <- data.frame(var_x,var_y) # bind into a data frame
bad_vals <- rpois(n=1,lambda=file_na) # determine NA number
df[sample(nrow(df),size=bad_vals),1] <- NA # random NA in var_x
df[sample(nrow(df),size=bad_vals),2] <- NA # random NA in var_y
# create label for file name with padded zeroes
file_label <- paste(file_folder,
"ranFile",
formatC(i,
width=3,
format="d",
flag="0"),
".csv",sep="")
# set up data file and incorporate time stamp and minimal metadata
write.table(cat("# Simulated random data file for batch processing","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# GED","\n",
"# ------------------------", "\n",
"\n",
file=file_label,
row.names="",
col.names="",
sep=""))
# now add the data frame
write.table(x=df,
file=file_label,
sep=",",
row.names=FALSE,
append=TRUE)
}
}
##################################################
# function: reg_stats
# fits linear model, extracts statistics
# input: 2-column data frame (x and y)
# output: slope, p-value, and r2
#-------------------------------------------------
reg_stats <- function(d=NULL) {
if(is.null(d)) {
x_var <- runif(10)
y_var <- runif(10)
d <- data.frame(x_var,y_var)
}
. <- lm(data=d,d[,2]~d[,1])
. <- summary(.)
stats_list <- list(slope=.$coefficients[2,1],
std_error=.$coefficients[2,2],
r2=.$r.squared)
return(stats_list)
}
log_stats <- function(d=NULL) {
if(is.null(d)) {
x_var <- runif(10)
y_var <- runif(10)
d <- data.frame(x_var,y_var)
}
. <- glm(data=d,d[,2]~d[,1])
. <- summary(.)
stats_list2 <- list(slope=.$coefficients[2,1],
std_error=.$coefficients[2,2],
r2=.$r.squared)
return(stats_list2)
}
#--------------------------------------------
# Global variables
file_folder <- "NewFiles/"
n_files <- 120
file_out <- "StatsSummary.csv"
#--------------------------------------------
# Create 120 random data sets
dir.create(file_folder)
## Warning in dir.create(file_folder): 'NewFiles' already exists
file_creator(file_n=n_files)
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
file_names <- list.files(path=file_folder)
# Create data frame to hold file summary statistics
ID <- seq_along(file_names)
file_name <- file_names
slope <- rep(NA,n_files)
std_error <- rep(NA,n_files)
r2 <- rep(NA,n_files)
stats_out <- data.frame(ID,file_name,slope,std_error,r2)
# batch process by looping through individual files
for (i in seq_along(file_names)) {
data <- read.table(file=paste(file_folder,file_names[i],sep=""),
sep=",",
header=TRUE) # read in next data file
d_clean <- data[complete.cases(data),] # get clean cases
. <- reg_stats(d_clean) # pull regression stats from clean file
stats_out[i,3:5] <- unlist(.) # unlist, copy into last 3 columns
}
# set up output file and incorporate time stamp and minimal metadata
write.table(cat("# Summary stats for ",
"batch processing of regression models","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# GED","\n",
"# ------------------------", "\n",
"\n",
file=file_out,
row.names="",
col.names="",
sep=""))
## ""
# now add the data frame
write.table(x=stats_out,
file=file_out,
row.names=FALSE,
col.names=TRUE,
sep=",",
append=TRUE)
## Warning in write.table(x = stats_out, file = file_out, row.names = FALSE, :
## appending column names to file