--- title: "Test preparation 14 Sept" author: "Jonathan Herrewijnen" date: "September 14, 2018" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## R Markdown This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ```{r cars} summary(cars) ``` ## Including Plots You can also embed plots, for example: ```{r pressure, echo=FALSE} plot(pressure) ``` Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. ```{r} #Library etc require(reshape2) iris$id <- rownames(iris) iris_long <- melt(iris) ## Using Species, id as id variables iris_wide <- dcast(formula = Species + id ~ variable, data = iris_long ) head(iris_wide) #Calculate the average of each measurement for long/wide head(iris_wide) aggregate(formula = .~ Species, data = iris_wide[, -2], FUN = mean) #Offff aggregate(cbind(Sepal.Length, Sepal.Width) ~Species, data=iris_wide, FUN =mean) aggregate(formula = value ~ Species + variable, data = iris_long, FUN = mean) #Create list from data iris_list <- split(iris, iris$Species) head(iris_list) summary_list = alist() for( i in seq_along(iris_list)){ summary_list[[i]] <- summary(iris_list[[i]]) } summary_list #Create list using lapply() lapply(iris_list, summary) #Cars library head(mtcars) wt_hp_perCyl <- aggregate(cbind(wt, hp) ~ cyl, data = mtcars, FUN = mean) wt_hp_perCyl #merging merged_result <- merge(mtcars, wt_hp_perCyl, by = "cyl", suffixes = c("", "_mean")) head(merged_result) #Selecting sel_mtcars <- mtcars[ (mtcars$cyl == 6 | mtcars$cyl == 8) & mtcars$gear <= 3 & mtcars$mpg > 19 ,] sel_mtcars ``` //PLOTS ETC ```{r} plot(x = iris$Sepal.Length, y = iris$Sepal.Width) abline(a = 3.5, b = -0.1, col = 'red', lwd = 3) myFun <- function(input_vector, a ,b) { y <- a + b*input_vector return(y) } lines(iris$Sepal.Length,myFun(input_vector = iris$Sepal.Length, a = 4, b = -0.15), col = 'blue', cex = 3 ) par(mfrow = c(2, 2)) # for plotting 4 graphs in a single figure. with(iris, hist(Sepal.Length)) with(iris, hist(Sepal.Width)) with(iris, hist(Petal.Length)) with(iris, plot(iris$Sepal.Length, Petal.Width)) require(ggplot2) summary(midwest) #SELEcTING SOMETHING FROM A GRAPH diamonds[ diamonds$y == 58.9, ] ``` //ACTUAL TEST HERE ```{r} a <- 7 myFun <- function(x) { a <- 2 y <- x + a return(y) a } summary(a) plot(Sepal.Length~Sepal.Width,data = iris) plot(Sepal.Length~Species,data = iris) summary(iris) my_data <- iris_wide iris_wide my_data[[3]]$Variable[8] iris$Species[51] arg1 <- 1 arg2 <-2 input <- 3 myFun <- function(arg1, arg2, x) { if( is.numeric(c(arg1,arg2))){ result <- cor(arg1, arg2) } else{ print("Input should be numeric") } } y <- myFun(1, 2, x) y ``` ```{r} myFun <- function(arg1, arg2, input) { if( is.numeric(c(arg1,arg2))){ result <- cor(arg1, arg2) } else{ print("Input should be numeric") } } y <- myFun(1, 2, 5) y+1 cor(1, 2) a<-1 b<-2 myFun <- function(a, b) { result1 <- a * b result2 <- a^b result3 <- seq(from = a, to = b, length.out = 10) my_list <- list() my_list[[1]] <- result1 my_list[[2]] <- result2 my_list[[3]] <- result3 return(my_list) } myFun(10, 20) summary(iris) iris_slct <- iris[(iris$Species == "setosa" & iris$Sepal.Length<4.5) | (iris$Species =="versicolor" & iris$Sepal.Width>3.5), ] iris_slct2 <- iris[(iris$Species =="virginica" & iris$Sepal.Width>3.5), ] iris_slct iris_slct2 library(reshape2) require(reshape2) ?sapply ?ggplot ?aes ?aggregate ?by ?split ?reshape2 ?cars split(iris) ``` //THE EXERCISE dat is the input dataframe cat_colnames is the column name to summarize by. sel_colname is the column what will be filtered/ selected over sel_value is the value we filter/select over The function should select all rows smaller than the sel_value within the sel_colname column. The function should calculate the mean of all columns of your selected data per level of your cat_colname column. The result of the function must be a list containing the result of the filtering and the result of the summarization. In your R- script also run the function you created, you can use the iris dataset to test. Save the result of your function to disk using the save() function Write the function in a R script. Save the file with the same name as the function and with the .R extension. Upload the .R file and the upload the result of your function to BB. ```{r} dat <-iris #To get you started here are some hints: my_fun <- function(dat, cat_colname, sel_colname, sel_value) { #to filter/ select using the variable argument use: dat[dat[ ,sel_colname]