From 091c544cb52b93cac1f447f7cb5ba6f8275b99b0 Mon Sep 17 00:00:00 2001 From: Jonathan Herrewijnen Date: Sun, 4 Dec 2022 20:38:24 +0000 Subject: [PATCH] Adding old r scripts --- BOO/BOORstudioScriptV2_1.Rmd | 140 +++++++ Other/06 Sept CBR.Rmd | 144 +++++++ Other/14 Sept Test and Prep.Rmd | 311 +++++++++++++++ Other/ICT Sleep Data Case Session.Rmd | 173 +++++++++ Other/ICT_Sleep_Data_Case_Session.html | 360 ++++++++++++++++++ Other/Jonathan_Herrewijnen_s1830899_Anova.Rmd | 216 +++++++++++ Other/Test ICT 7 Sept.Rmd | 180 +++++++++ .../create_IFP_datasets_v2_dense.py | 0 readme.md | 3 + 9 files changed, 1527 insertions(+) create mode 100644 BOO/BOORstudioScriptV2_1.Rmd create mode 100644 Other/06 Sept CBR.Rmd create mode 100644 Other/14 Sept Test and Prep.Rmd create mode 100644 Other/ICT Sleep Data Case Session.Rmd create mode 100644 Other/ICT_Sleep_Data_Case_Session.html create mode 100644 Other/Jonathan_Herrewijnen_s1830899_Anova.Rmd create mode 100644 Other/Test ICT 7 Sept.Rmd rename create_IFP_datasets_v2_dense.py => RP1/create_IFP_datasets_v2_dense.py (100%) create mode 100644 readme.md diff --git a/BOO/BOORstudioScriptV2_1.Rmd b/BOO/BOORstudioScriptV2_1.Rmd new file mode 100644 index 0000000..32fb951 --- /dev/null +++ b/BOO/BOORstudioScriptV2_1.Rmd @@ -0,0 +1,140 @@ +--- +title: "BOOScript" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +require(tidyverse) +require(MotilityLab) +require(magrittr) +require(tidyr) +require(stats) + +``` + +Basic peace of code to show the amount of cells over time (MCS) in the model. The second graph takes the root of these cells, the third one the cubic root. We believe it is most comparable to take the square root and not the cubic root (initially because volume is cubic, as is our data. + +We calculated this by getting the r out of the Volume (4/3*pi*r(1/3): volume of a sphere), and then using that r to calculate the surface area of a square area (pi*r^2: surface area) + +```{r} +#PLOT AMOUNT OF CELLS/CTL +setwd("D:/BFW/BOO/TestRuns/BOOModel_/") +Logger<- read.csv(file ="logger.txt", sep="") +plot(Logger$time, Logger$celltype.cells.size, main="Amount of cells in model", xlab="Time in MCS", ylab="Amount of cells in model", col = "blue") +plot(Logger$time, Logger$celltype.cells.size, main="Amount of cells in model (log)", xlab="Time in MCS (Logarithmic)", ylab="Amount of cells in model", log = "x", col = "blue") +plot(Logger$time, Logger$celltype.CTL.size, main="Amount of CTL in model", xlab="Time in MCS", ylab="Amount of cells in model") + +setwd("D:/BFW/BOO/TestRuns/BOOModel_12/") +Logger2<- read.csv(file ="logger.txt", sep="") +plot(Logger2$time, Logger2$celltype.cells.size, main="Amount of cells in model", xlab="Time in MCS", ylab="Amount of cells in model", col = "red") +plot(Logger2$time, Logger2$celltype.cells.size, main="Amount of cells in model (log)", xlab="Time in MCS (Logarithmic)", ylab="Amount of cells in model", log = "x", col = "red") + +Logger <- as.data.frame(Logger) +Logger$SurfaceSize <- Logger$celltype.cells.size + +Logger2 <- as.data.frame(Logger2) +Logger2$SurfaceSize <- Logger2$celltype.cells.size + +#PLOT CELLS IN MM2 (PLUS LOG) +Logger$SurfaceSize <- sqrt(Logger$SurfaceSize) +plot(Logger$time, Logger$SurfaceSize, main="Amount of cell area (mm2)", xlab="Time in MCS", ylab="sqrt of n cells", col = "blue") +plot(Logger$time, Logger$SurfaceSize, main="Amount of cell area (mm2)(log)", xlab="Time in MCS (Logarithmic)", ylab="sqrt of n cells", log = "x", col = "blue") + +Logger2$SurfaceSize <- sqrt(Logger2$SurfaceSize) +plot(Logger2$time, Logger2$SurfaceSize, main="Amount of cell area (mm2)(log)", xlab="Time in MCS", ylab="sqrt of n cells", col = "red") +plot(Logger2$time, Logger2$SurfaceSize, main="Amount of cell area (mm2)", xlab="Time in MCS (Logarithmic)", ylab="sqrt of n cells", log = "x", col = "red") + +#PLOT CELLS IN MM3 +Logger$VolumeSize <- Logger$celltype.cells.size +Logger$VolumeSize <- Logger$VolumeSize^(1/3) +plot(Logger$time, Logger$VolumeSize, main="Approximate amount of cell area (cubic, mm3)", xlab="Time in MCS", ylab="^(1/3) of n cells") +``` + +NEXT UP +Calculation and showing of CTL tracks, and thus migration speed and MSD, this can than be compared to the graphs we have from Weigelin et al. + +```{r} +#CELL TRACKS, not sure how it is presented however, just for the sake of it. +data_tracks <- as.tracks.data.frame(Logger, id.column="cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y")) +#plot(data_tracks, main ="cell tracks density 100") +#plot(data_tracks) + +data_tracks2 <- as.tracks.data.frame(Logger2, id.column="cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y")) +#plot(data_tracks, main ="cell tracks density 100") +#plot(data_tracks) + +#celltracks <- as.tracks.data.frame(Logger, id.column = "cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y")) +#plot(celltracks) + +#MIGRATION SPEED +speed_CTL <- list() +for(i in seq_along(data_tracks)){ + speed_CTL[[i]] <- speed(data_tracks[[i]]) +} + +speed_CTL2 <- list() +for(i in seq_along(data_tracks2)){ + speed_CTL2[[i]] <- speed(data_tracks2[[i]]) +} + +#HERE THE ID's OF THE CTL CELLS (Define amount of tumor first, then calculate the amount of CTL's) +speed_cells_frame <- as.data.frame(speed_CTL[51:70]) +speed_cells_frame_long <- gather(speed_cells_frame) +speed_cells_frame_long<- speed_cells_frame_long$value + +boxplot(speed_cells_frame_long, main = "speed density 100") +?plot + +speed_cells_frame2 <- as.data.frame(speed_CTL2[51:70]) +speed_cells_frame_long2 <- gather(speed_cells_frame2) +speed_cells_frame_long2<- speed_cells_frame_long2$value + +boxplot(speed_cells_frame_long2, main = "speed density 100") +?plot + +#MEAN SQUARE DISPLACEMENT with CI 95% +# "mean.ci.95" Outputs the mean and upper and lower bound of a parametric 95 percent confidence intervall. +CTL_msqrd <- aggregate(data_tracks, squareDisplacement, FUN="mean.ci.95") +CTL_msqrd2 <- aggregate(data_tracks2, squareDisplacement, FUN="mean.ci.95") + +# data set with ONLY i and the Mean Sqr displacement +CTL_msqrd_1 <- subset(CTL_msqrd, select = -c(lower, upper)) +CTL_msqrd_2 <- subset(CTL_msqrd2, select = -c(lower, upper)) + +plot(CTL_msqrd_1, xlab="time step", + ylab="mean square displacement", main="Mean Square Displacement", type="l", col="blue") +lines(CTL_msqrd_2, xlab="time step", + ylab="mean square displacement", main="Mean Square Displacement", type="l", col="red") + +plot(CTL_msqrd_2, xlab="time step", + ylab="mean square displacement", main="Mean Square Displacement", type="l", col="red") +lines(CTL_msqrd_1, xlab="time step", + ylab="mean square displacement", main="Mean Square Displacement", type="l", col="blue") +legend(1, 60000, legend=c("No treatment, control", "CD137 treatment"), + col=c("red", "blue"), lty=1:2, cex=0.8) +``` + +```{r} +#AMOUNT OF CELLS KILLED +#The way this works is, we take the amount of cells proliferated, add the amount of cells spawned in with them. That is the total amount of cells we then COULD have in the model. But since CTL kill the cells, we take the total amount of cells in the model and calculate the difference: (All TC proliferation + initial TC amount) - Amount of cells in model +setwd("D:/BFW/BOO/MorpheusRuns/BOOModel_565/") +celldivision <- read.delim("cell_division_cells.txt") +AmountDivisions <- (nrow(celldivision)-1) +AmountDivisions + +#MANUAL INPUT TWICE +TotalPossibleCells <- 50 + AmountDivisions +TotalCellsInModel <- tail(Logger$celltype.cells.size, n=1) +TotalCellsKilled <- TotalPossibleCells - TotalCellsInModel + +TotalPossibleCells +TotalCellsInModel +TotalCellsKilled +celldivision + +plot(Logger$time, Logger$celltype.cells.size) + +mc.cores +``` + diff --git a/Other/06 Sept CBR.Rmd b/Other/06 Sept CBR.Rmd new file mode 100644 index 0000000..c69b260 --- /dev/null +++ b/Other/06 Sept CBR.Rmd @@ -0,0 +1,144 @@ +--- +title: "BCR 06 Sept" +author: "Jonathan Herrewijnen" +date: "September 6, 2018" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + +//CBR Two dimensional phase portraits, equilibria and stability + +```{r} +#install.packages("phaseR") +library("phaseR") + +# define 2D ODE model +model = function(timepoint, state, parameters) { +with(as.list(c(state, parameters)), { +dx = -2 * x - 3 * y +dy = -x - 2 * y +list(c(dx, dy)) +}) +} +# for setting the axis limits +xlim = c(-10, 10) +ylim = c(-10, 10) +# no parameters in the ODE, so we set pars as an empty vector +pars = vector() + +# draw vector field +model.flowField = flowField(xlab = "x", ylab = "y", model, xlim = xlim, ylim = ylim, +parameters = pars, points = 19, add = FALSE, state.names = c("x", "y")) + +# draw nullclines +model.nullclines = nullclines(model, xlim = xlim, ylim = ylim, parameters = pars, +points = 500, state.names = c("x", "y")) + +# draw trajectory +model.trajectory = trajectory(model, y0 = c(-5, -5), tlim = c(0, 10), parameters = pars, +state.names = c("x", "y")) + +``` + +//Numerical Solution + +```{r} +# perform numerical simulation and plot end result +model.numericalSolution = numericalSolution(model, y0 = c(-5, -5), tlim = c(0, +20), type = "one", parameters = pars, col = c("green", "orange"), ylim = ylim, +add.legend = F) + +# make legend for plot +legend("topright", col = c("green", "orange"), legend = c("x", "y"), lty = 1) + +# after the next command, click mouse to find values of a point within the +# phase portrait (e.g., to approximate the value of an equilibrium) + +eq = locator(n = 1) +eq +``` + +//QUESTION 1 + +The 0, 0 location was determined from the first phase portrait! +Not from the second click portrait! + +```{r} +#?stability + +stability(model, ystar = c(0, 0)) +#stability(deriv, ystar = NULL, parameters = NULL, system = "two.dim", h = 1e-07, summary = TRUE, state.names = c("x", "y")) + + +``` + +//Question 2 +Meaning of T and Delta. + +T=(I THINK!) the value of the equilibrium at dx/dt=0 -> So for the equilibrium, in our case the equilibrium is stable. +Delta="In the two dimensional system case, value of the Jacobians determinant at y.star. ->So the Jacobian matrix + +//Question 3: PhasePlaneAnalysis + +```{r} +# perform phase plane analysis +phasePlaneAnalysis(model, xlim = xlim, ylim = ylim, parameters = pars) +``` + +//QUESTION 4-8 +```{r} +library("phaseR") +# define 2D ODE model +model = function(timepoint, state, parameters) { +with(as.list(c(state, parameters)), { + +#Question 4 +dx = -x + (2 * y) +dy = (-5 * x) - y + +#Question 5 +#dx = -3*x -2*y +#dy = -2*x -y + +list(c(dx, dy)) +}) +} + +# for setting the axis limits +xlim = c(-10, 10) +ylim = c(-10, 10) + +# no parameters in the ODE, so we set pars as an empty vector +pars = vector() + +#Determine Stability +stability(model, ystar = c(0, 0)) + +# perform phase plane analysis +phasePlaneAnalysis(model, xlim = xlim, ylim = ylim, parameters = pars) +``` + + diff --git a/Other/14 Sept Test and Prep.Rmd b/Other/14 Sept Test and Prep.Rmd new file mode 100644 index 0000000..fa87bb5 --- /dev/null +++ b/Other/14 Sept Test and Prep.Rmd @@ -0,0 +1,311 @@ +--- +title: "Test preparation 14 Sept" +author: "Jonathan Herrewijnen" +date: "September 14, 2018" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + +```{r} +#Library etc +require(reshape2) + +iris$id <- rownames(iris) +iris_long <- melt(iris) + +## Using Species, id as id variables +iris_wide <- dcast(formula = Species + id ~ variable, data = iris_long ) +head(iris_wide) + +#Calculate the average of each measurement for long/wide +head(iris_wide) +aggregate(formula = .~ Species, data = iris_wide[, -2], FUN = mean) + +#Offff +aggregate(cbind(Sepal.Length, Sepal.Width) ~Species, data=iris_wide, FUN =mean) + +aggregate(formula = value ~ Species + variable, data = iris_long, FUN = mean) + +#Create list from data +iris_list <- split(iris, iris$Species) +head(iris_list) + +summary_list = alist() +for( i in seq_along(iris_list)){ + summary_list[[i]] <- summary(iris_list[[i]]) +} +summary_list + +#Create list using lapply() +lapply(iris_list, summary) + +#Cars library +head(mtcars) + +wt_hp_perCyl <- aggregate(cbind(wt, hp) ~ cyl, data = mtcars, FUN = mean) +wt_hp_perCyl + +#merging +merged_result <- merge(mtcars, wt_hp_perCyl, by = "cyl", suffixes = c("", "_mean")) +head(merged_result) + +#Selecting +sel_mtcars <- mtcars[ (mtcars$cyl == 6 | mtcars$cyl == 8) & mtcars$gear <= 3 & mtcars$mpg > 19 ,] +sel_mtcars +``` + +//PLOTS ETC +```{r} +plot(x = iris$Sepal.Length, y = iris$Sepal.Width) +abline(a = 3.5, b = -0.1, col = 'red', lwd = 3) + +myFun <- function(input_vector, a ,b) { +y <- a + b*input_vector +return(y) +} + +lines(iris$Sepal.Length,myFun(input_vector = iris$Sepal.Length, a = 4, b = -0.15), col = 'blue', cex = 3 ) + +par(mfrow = c(2, 2)) # for plotting 4 graphs in a single figure. +with(iris, hist(Sepal.Length)) +with(iris, hist(Sepal.Width)) +with(iris, hist(Petal.Length)) +with(iris, plot(iris$Sepal.Length, Petal.Width)) + +require(ggplot2) +summary(midwest) + +#SELEcTING SOMETHING FROM A GRAPH +diamonds[ diamonds$y == 58.9, ] + +``` + +//ACTUAL TEST HERE +```{r} +a <- 7 + +myFun <- function(x) { +a <- 2 +y <- x + a +return(y) +a +} + +summary(a) + +plot(Sepal.Length~Sepal.Width,data = iris) +plot(Sepal.Length~Species,data = iris) + +summary(iris) + +my_data <- iris_wide +iris_wide +my_data[[3]]$Variable[8] +iris$Species[51] + +arg1 <- 1 +arg2 <-2 +input <- 3 + +myFun <- function(arg1, arg2, x) { + + if( is.numeric(c(arg1,arg2))){ + + result <- cor(arg1, arg2) + + } else{ + + print("Input should be numeric") } + +} + +y <- myFun(1, 2, x) +y +``` + +```{r} +myFun <- function(arg1, arg2, input) { + + if( is.numeric(c(arg1,arg2))){ + + result <- cor(arg1, arg2) + + } else{ + + print("Input should be numeric") } + +} + +y <- myFun(1, 2, 5) +y+1 +cor(1, 2) + +a<-1 +b<-2 + +myFun <- function(a, b) { + result1 <- a * b + result2 <- a^b + result3 <- seq(from = a, to = b, length.out = 10) + my_list <- list() + my_list[[1]] <- result1 + my_list[[2]] <- result2 + my_list[[3]] <- result3 + return(my_list) +} + +myFun(10, 20) + +summary(iris) +iris_slct <- iris[(iris$Species == "setosa" & iris$Sepal.Length<4.5) | (iris$Species =="versicolor" & iris$Sepal.Width>3.5), ] +iris_slct2 <- iris[(iris$Species =="virginica" & iris$Sepal.Width>3.5), ] +iris_slct +iris_slct2 + +library(reshape2) +require(reshape2) + +?sapply +?ggplot +?aes + +?aggregate +?by +?split +?reshape2 +?cars + +split(iris) +``` + +//THE EXERCISE + +dat is the input dataframe +cat_colnames is the column name to summarize by. +sel_colname is the column what will be filtered/ selected over +sel_value is the value we filter/select over +The function should select all rows smaller than the sel_value within the sel_colname column. +The function should calculate the mean of all columns of your selected data per level of your cat_colname column. +The result of the function must be a list containing the result of the filtering and the result of the summarization. +In your R- script also run the function you created, you can use the iris dataset to test. +Save the result of your function to disk using the save() function +Write the function in a R script. Save the file with the same name as the function and with the .R extension. +Upload the .R file and the upload the result of your function to BB. + +```{r} +dat <-iris + +#To get you started here are some hints: +my_fun <- function(dat, cat_colname, sel_colname, sel_value) { + +#to filter/ select using the variable argument use: +dat[dat[ ,sel_colname]. + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) + +#For knitting deactivated +#install.packages("data") +data(sleep) +head(sleep) +sleep +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + +We found two groups in our data, but we are not sure whether the second group is our control group or a second medicine. This is not clarified clearly in the data description. So we are assuming that the second group is the control group, but we are not certain. + +//OPDRACHTEN - EXERCISES +Question 1) +Describe the dataset by explaining the variables. If any what are the replicates and/or controls? + +```{r} +summary(sleep) +sleep +?sleep +``` + +See the code above, the table "sleep" contains 3 variables, namely: extra, group, ID. +-"extra" stands for the extra hours of sleep a student who took a certain drug +-"group" stands for the drug a student was given, with 2 different types of drugs +-"ID" stands for the patient, one ID per patient/student + +20 observations and 3 variables are contained within the table. Second group is the control group. + + +Question 2) +Describe the performed experiment/ analysis. What was of interest/ what was the research question? + +Students were given a drug, two seperate groups were created, in which the first group got a sleep prolongation drug, and second group was used as control group. The researchers were interested in the amount of prolonged sleep in hours. + +Using ?sleep. + +Question 3) +Summarize the data statistically. + +See the summary and t.test below in the next code snippet box + +Question 4) +Are there any clear outliers or missing values? If so, which ones? + +The boxplot is not showing any outliers, so no outliers are visible. + +```{r} +require(stats) + +#QUESTION THREE +summary(sleep) + +#Simple t.test +with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE)) + +?sleep +?boxplot + +#QUESTION FOUR +#Individual plots +boxplot(with(sleep, extra[group==1])) +boxplot(with(sleep, extra[group==2]), add=TRUE) + +boxplot(with(sleep, extra[group==1], extra[group==2]), paired=TRUE) +#plot(with(sleep, extra[group==2]), add = TRUE) + +boxplot(sleep$extra ~ sleep$group, xlab="Groups", ylab="Sleep prolongation (h)") +points(sleep$extra ~ sleep$group, pch = 1) +``` + +Question 5) +Summarize the data graphically using only 1 figure. Choose a figure that summarizes the data adequately. Use the graphical package ggplot2. Explain your choice. + +We have two groups and only one variable. So a boxplot is a logical choice for comparing both datasets. + +```{r} +library(ggplot2) + +ggplot(sleep, aes(x=group, y=extra)) + geom_boxplot() +``` + +Question 6) +Explore the data, include check for distribution/ normality range of variables mention number and type of variables and observations + +```{r} +#Checking for distribution using a QQplot +par(mfrow=c(2, 1)) #plots 2 graphs +with(sleep, hist(extra[group==1])) +with(sleep, hist(extra[group==2])) +``` + +No distribution seems to be visible (if so, then a bell curve would be expected) + +Question 7) +Calculate the mean sleep prolongation of the two drugs using the aggregate function on the sleep data frame. + +```{r} +#Calculate mean sleep +aggregate(formula = extra~group, data = sleep, FUN = mean) +``` + +See function above, results are 0.75 hours and 2.33 hours extra sleep for drug one and two respectively + +Question 8) +Calculate the difference in sleep prolongation based on drug type for each patient. +Store this new dataset in a new data frame including patient number. Do this by writing a function with as input argument the sleep data frame and as output the new data frame. + +```{r} +my_dataframe <- data.frame(with(sleep, extra[group==1]-extra[group==2])) +my_dataframe +``` + +See above. + +Question 9) +Is the sleep prolongation difference between the drugs (more) normaly distributed? + +```{r} +shapiro.test(sleep$extra) +hist(sleep$extra) +``` + +So the p-value here is 0.3114, meaning that it differs signifcantly from a normal distribution. We can not assume that the data is normaly distributed. + + +Question 10) +Was there a significant difference in sleep prolongation between the drugs? Mention the effect size, is this of a relevant magnitude? Explain why. + +Also explain why you chose this specific test. + +```{r} +ThisData <- data.frame(with(sleep, mean(extra[group==1])), with(sleep, mean(extra[group==2]))) +ThisData + +with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE)) +``` + +The mean between both drugs is very different, 0.75 to 2.33, implying a difference. + +The t.test shows a difference of -1.58 between the test with a reliability of 0.002833. + + + + + diff --git a/Other/ICT_Sleep_Data_Case_Session.html b/Other/ICT_Sleep_Data_Case_Session.html new file mode 100644 index 0000000..b1de147 --- /dev/null +++ b/Other/ICT_Sleep_Data_Case_Session.html @@ -0,0 +1,360 @@ + + + + + + + + + + + + + + + +Case Study Sleep + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + + + + + + + +
+

R Markdown

+

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

+

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

+
summary(cars)
+
##      speed           dist       
+##  Min.   : 4.0   Min.   :  2.00  
+##  1st Qu.:12.0   1st Qu.: 26.00  
+##  Median :15.0   Median : 36.00  
+##  Mean   :15.4   Mean   : 42.98  
+##  3rd Qu.:19.0   3rd Qu.: 56.00  
+##  Max.   :25.0   Max.   :120.00
+
+
+

Including Plots

+

You can also embed plots, for example:

+

+
##   extra group ID
+## 1   0.7     1  1
+## 2  -1.6     1  2
+## 3  -0.2     1  3
+## 4  -1.2     1  4
+## 5  -0.1     1  5
+## 6   3.4     1  6
+
##    extra group ID
+## 1    0.7     1  1
+## 2   -1.6     1  2
+## 3   -0.2     1  3
+## 4   -1.2     1  4
+## 5   -0.1     1  5
+## 6    3.4     1  6
+## 7    3.7     1  7
+## 8    0.8     1  8
+## 9    0.0     1  9
+## 10   2.0     1 10
+## 11   1.9     2  1
+## 12   0.8     2  2
+## 13   1.1     2  3
+## 14   0.1     2  4
+## 15  -0.1     2  5
+## 16   4.4     2  6
+## 17   5.5     2  7
+## 18   1.6     2  8
+## 19   4.6     2  9
+## 20   3.4     2 10
+

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

+

We found two groups in our data, but we are not sure whether the second group is our control group or a second medicine. This is not clarified clearly in the data description. So we are assuming that the second group is the control group, but we are not certain.

+

//OPDRACHTEN - EXERCISES Question 1) Describe the dataset by explaining the variables. If any what are the replicates and/or controls?

+
summary(sleep)
+
##      extra        group        ID   
+##  Min.   :-1.600   1:10   1      :2  
+##  1st Qu.:-0.025   2:10   2      :2  
+##  Median : 0.950          3      :2  
+##  Mean   : 1.540          4      :2  
+##  3rd Qu.: 3.400          5      :2  
+##  Max.   : 5.500          6      :2  
+##                          (Other):8
+
sleep
+
##    extra group ID
+## 1    0.7     1  1
+## 2   -1.6     1  2
+## 3   -0.2     1  3
+## 4   -1.2     1  4
+## 5   -0.1     1  5
+## 6    3.4     1  6
+## 7    3.7     1  7
+## 8    0.8     1  8
+## 9    0.0     1  9
+## 10   2.0     1 10
+## 11   1.9     2  1
+## 12   0.8     2  2
+## 13   1.1     2  3
+## 14   0.1     2  4
+## 15  -0.1     2  5
+## 16   4.4     2  6
+## 17   5.5     2  7
+## 18   1.6     2  8
+## 19   4.6     2  9
+## 20   3.4     2 10
+
?sleep
+
## starting httpd help server ... done
+

See the code above, the table “sleep” contains 3 variables, namely: extra, group, ID. -“extra” stands for the extra hours of sleep a student who took a certain drug -“group” stands for the drug a student was given, with 2 different types of drugs -“ID” stands for the patient, one ID per patient/student

+

20 observations and 3 variables are contained within the table. Second group is the control group.

+

Question 2) Describe the performed experiment/ analysis. What was of interest/ what was the research question?

+

Students were given a drug, two seperate groups were created, in which the first group got a sleep prolongation drug, and second group was used as control group. The researchers were interested in the amount of prolonged sleep in hours.

+

Using ?sleep.

+

Question 3) Summarize the data statistically.

+

See the summary and t.test below in the next code snippet box

+

Question 4) Are there any clear outliers or missing values? If so, which ones?

+

The boxplot is not showing any outliers, so no outliers are visible.

+
require(stats)
+
+#QUESTION THREE
+summary(sleep)
+
##      extra        group        ID   
+##  Min.   :-1.600   1:10   1      :2  
+##  1st Qu.:-0.025   2:10   2      :2  
+##  Median : 0.950          3      :2  
+##  Mean   : 1.540          4      :2  
+##  3rd Qu.: 3.400          5      :2  
+##  Max.   : 5.500          6      :2  
+##                          (Other):8
+
#Simple t.test
+with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE))
+
## 
+##  Paired t-test
+## 
+## data:  extra[group == 1] and extra[group == 2]
+## t = -4.0621, df = 9, p-value = 0.002833
+## alternative hypothesis: true difference in means is not equal to 0
+## 95 percent confidence interval:
+##  -2.4598858 -0.7001142
+## sample estimates:
+## mean of the differences 
+##                   -1.58
+
?sleep
+?boxplot
+
+#QUESTION FOUR
+#Individual plots
+boxplot(with(sleep, extra[group==1]))
+boxplot(with(sleep, extra[group==2]), add=TRUE)
+

+
boxplot(with(sleep, extra[group==1], extra[group==2]), paired=TRUE)
+

+
#plot(with(sleep, extra[group==2]), add = TRUE)
+
+boxplot(sleep$extra ~ sleep$group, xlab="Groups", ylab="Sleep prolongation (h)")
+points(sleep$extra ~ sleep$group, pch = 1)
+

+

Question 5) Summarize the data graphically using only 1 figure. Choose a figure that summarizes the data adequately. Use the graphical package ggplot2. Explain your choice.

+

We have two groups and only one variable. So a boxplot is a logical choice for comparing both datasets.

+
library(ggplot2)
+
+ggplot(sleep, aes(x=group, y=extra)) + geom_boxplot()
+

+

Question 6) Explore the data, include check for distribution/ normality range of variables mention number and type of variables and observations

+
#Checking for distribution using a QQplot
+par(mfrow=c(2, 1)) #plots 2 graphs
+with(sleep, hist(extra[group==1]))
+with(sleep, hist(extra[group==2]))
+

+

No distribution seems to be visible (if so, then a bell curve would be expected)

+

Question 7) Calculate the mean sleep prolongation of the two drugs using the aggregate function on the sleep data frame.

+
#Calculate mean sleep
+aggregate(formula = extra~group, data = sleep, FUN = mean)
+
##   group extra
+## 1     1  0.75
+## 2     2  2.33
+

See function above, results are 0.75 hours and 2.33 hours extra sleep for drug one and two respectively

+

Question 8) Calculate the difference in sleep prolongation based on drug type for each patient. Store this new dataset in a new data frame including patient number. Do this by writing a function with as input argument the sleep data frame and as output the new data frame.

+
my_dataframe <- data.frame(with(sleep, extra[group==1]-extra[group==2]))
+my_dataframe
+
##    with.sleep..extra.group....1....extra.group....2..
+## 1                                                -1.2
+## 2                                                -2.4
+## 3                                                -1.3
+## 4                                                -1.3
+## 5                                                 0.0
+## 6                                                -1.0
+## 7                                                -1.8
+## 8                                                -0.8
+## 9                                                -4.6
+## 10                                               -1.4
+

See above.

+

Question 9) Is the sleep prolongation difference between the drugs (more) normaly distributed?

+
shapiro.test(sleep$extra)
+
## 
+##  Shapiro-Wilk normality test
+## 
+## data:  sleep$extra
+## W = 0.94607, p-value = 0.3114
+
hist(sleep$extra)
+

+

So the p-value here is 0.3114, meaning that it differs signifcantly from a normal distribution. We can not assume that the data is normaly distributed.

+

Question 10) Was there a significant difference in sleep prolongation between the drugs? Mention the effect size, is this of a relevant magnitude? Explain why.

+

Also explain why you chose this specific test.

+
ThisData <- data.frame(with(sleep, mean(extra[group==1])), with(sleep, mean(extra[group==2])))
+ThisData
+
##   with.sleep..mean.extra.group....1...
+## 1                                 0.75
+##   with.sleep..mean.extra.group....2...
+## 1                                 2.33
+
with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE))
+
## 
+##  Paired t-test
+## 
+## data:  extra[group == 1] and extra[group == 2]
+## t = -4.0621, df = 9, p-value = 0.002833
+## alternative hypothesis: true difference in means is not equal to 0
+## 95 percent confidence interval:
+##  -2.4598858 -0.7001142
+## sample estimates:
+## mean of the differences 
+##                   -1.58
+

The mean between both drugs is very different, 0.75 to 2.33, implying a difference.

+

The t.test shows a difference of -1.58 between the test with a reliability of 0.002833.

+
+ + + + +
+ + + + + + + + diff --git a/Other/Jonathan_Herrewijnen_s1830899_Anova.Rmd b/Other/Jonathan_Herrewijnen_s1830899_Anova.Rmd new file mode 100644 index 0000000..6e73021 --- /dev/null +++ b/Other/Jonathan_Herrewijnen_s1830899_Anova.Rmd @@ -0,0 +1,216 @@ +--- +title: "Assignment ANOVA" +author: "Jonathan Herrewijnen" +output: + html_document: + theme: cerulean + toc: yes +--- + +Gebruik de tutorial om deze opdrachten te maken. + +## Question 1: Coagulation + +### Beschrijving data + +Blood coagulation times by diet + +The example dataset we will use is a set of 24 blood coagulation times. 24 animals were randomly assigned +to four different diets and the samples were taken in a random order. This data comes from Box, Hunter, +and Hunter (1978). + +This data frame contains the following columns + +* coag is the coagulation time in seconds +* diet are the diet type, here A,B,C or D + + +### Vragen + +Q1A. Wat voor type data zijn coag en diet ? +Coag=nominaal; Diet=ordinaal + + +Q1B. Wat voor type statistische model(len) kunnen we gebruiken voor de analyse van deze data? +Anova + + +Q1C. Welke variabele is de response en welke de verklarende variabele? +Respons(y-as)=coag; Verklarende(x-as)=diet. + + +Q1D. Wat is de null-hypothese +H0: Er is geen effect van de diten op de coagulatietijd. + + +### Inlezen van de data. + +In deze vraag gebruiken we data uit het faraway pakket. Dit pakket moet je eerst downloaden als dat nog niet eerder is gedaan. Je kan controleren of het faraway pakket al is geinstaleerd via de `Package` tab in het venster rechtonder en type faraway in het zoekveld. Als deze niet is geinstalleerd kan je de install knop aanklikken in hetzelfde venster. + + +```{r} +library(faraway) +data(coagulation) +attach(coagulation) +``` + +Analyseer de data door gebruik te maken van de statistische werkvolgorde, zoals weergegevn in de ANOVA tutorial, en beantwoordt de volgende vragen. + + +Q1D. Zijn de aannamens voor het uitvoeren van een ANOVA correct? +Ja, alle gemiddeldes liggen rond de 0. Anova, analyse van varianties + +```{r} +model<-aov(coag~diet) +str(model) +model$residuals +plot(model$residuals~diet) +``` + + +Q1E. Mogen we de null-hypothese verwerpen? + +```{r} +summary(model) + +library(car) +qqPlot(model$residuals) +``` + +Ja, de waarde valt onder de 0,05. + + +Q1F. Welke waarde heeft de test-statistiek? +13,57 volgens de summary. + + +Q1G. Welke diet-afhankelijke coagulatie-tijden verschillen statistisch significant van elkaar? +Volgens mij code schrijven die de uiterste waarden weergeeft, en die met mekaar vergelijken. + + +## Question 2 Rabbit data + +### Beschrijving van de data + +A nutritionist studied the effects of six diets, on weight gain of domestic rabbits. From past experience with sizes of litters, it was felt that only 3 uniform rabbits could be selected from each available litter. There were ten litters available forming blocks of size three. In this analysis we only investigate the effect of diets on weight gained. + +* treat is the diet +* gain is the weight gain + + +### Vragen. + +Q2A. Wat voor type data zijn Treat en Weight? +Treat=dieet=6 verschillende=ordinaal +Weith=Waarde=Numeriek + + +Q2B. Wat voor een type model(len) kan je gebruiken om deze data te analyseren? Verklaar. +Alleen Anova, 6 vergelijkingen + + +Q2C. Wat zijn de de response variabele en verklarende variabele in deze dataset ? +Response=y-as=Weight +Verklarende=x-as=dieet + + +De data komen weer uit het faraway pakket. + +```{r} +data(rabbit) +attach(rabbit) +``` + +Analyseer de data door gebruik te maken van de tutorial en beantwoord de volgende vragen. + + +Q1D. Zijn de aannamens voor het uitvoeren van een ANOVA correct? Verklaar. + + +Q1E. Mogen we de null-hypothese verwerpen? Verklaar. + + +Q1F. Welke niveaus of de diet factor verschillen statistisch significant van elkaar? + +## Q3 + +A drug company tested three formulations of a pain relief medicine for +migraine headache sufferers. For the experiment 27 volunteers were selected +and 9 were randomly assigned to one of three drug formulations. The subjects +were instructed to take the drug during their next migraine headache episode and +to report their pain on a scale of 1 to 10 (10 being most pain). + + +### Inlezen data + +we voeren de data nu met de hand in. + + +```{r} +Pain = c(4, 5, 4, 3, 2, 4, 3, 4, 4, 6, 8, 4, 5, 4, 6, 5, 8, 6, 6, 7, 6, 6, 7, 5, 6, 5, 5) +Drug = gl(3,9,labels=c("A", "B", "C")) +DATA<-data.frame(Pain, Drug) +``` + +Gebuik de tutorial om de data te analyseren in de context van de volgende vraag: Is er een verschil in pijn bestrijding door de verschillende medicijnen bij migraine? + + + + +## Q4: Typical exam questions + +De tabel hieronder is gegeven maar er missen wat data (?). + +``` + Df Sum Sq Mean Sq F value Pr(>F) +VerklarendeF 3 ?1 2.199 ?3 0.0945 . +Residuals 36 34.53 ?2 +``` + +Q4.A Bereken de waarden die op de plaatsen van de vraagtekens moeten staan (?). + + + +Q4.B Welke eigenschappen van de t-statistiek en F-statistiek komen overeen? + + + + +## Simulatie ANOVA experiment. + + + +Eerst maken we de factor. We hebben twee versies nodig van de factor, een numerieke en een categorische. De numerieke is alleen een lijst van de niveaus gemiddelde. + +```{r} +VerklarendeN<-c(rep(2,10),rep(2.2,10),rep(2.8,10),rep(3.4,10) ) +``` + +dan de categorische versie wat een naam aan de gemiddelde koppeld. + +```{r} +VerklarendeF<-gl(4,10, labels=c("hokus", "Pokus", "Pilatus", "Pas")) +``` + + +We maken nu een ANOVA model alleen met de gemiddelde en plotten het resultaat + +```{r} +Response<-VerklarendeN +plot(Response~VerklarendeF) +``` + +We zien alleen de verschillende factor gemiddelde liggen, de between-group variance. + +Nu voegen we de rest term toe ofwel we geven de within-group varianec een waarde. We voegen daarom een stochatische variabele toe met een gemiddelde van 0 en een bepaalde standaard deviatie (sd) hier 1.2. + +```{r} +ResponseR<-VerklarendeN+rnorm(40,0,1.2) +Dat2<-data.frame(ResponseR,VerklarendeF) +summary(Dat2) +plot(ResponseR~ VerklarendeF) +points(ResponseR~ VerklarendeF) +``` + +Voer een analyse uit op deze data en pas de gemiddelde zo aan dat de test net significant is. Hiervoor moeten ze de VerklarendeF +aanpassen. Letop ik gebruik geen set.seed. diff --git a/Other/Test ICT 7 Sept.Rmd b/Other/Test ICT 7 Sept.Rmd new file mode 100644 index 0000000..1f09471 --- /dev/null +++ b/Other/Test ICT 7 Sept.Rmd @@ -0,0 +1,180 @@ +--- +title: "Test 7 Sept" +author: "Jonathan Herrewijnen" +date: "September 7, 2018" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## R Markdown + +This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see . + +When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: + +```{r cars} +summary(cars) +``` + +## Including Plots + +You can also embed plots, for example: + +```{r pressure, echo=FALSE} +plot(pressure) +``` + +Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot. + +//Question 1 +```{r} +library(Deriv) + +t=5.49116 +Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t) +Bp1 + +#T VERVANGEN VOOR X +f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)} + +#Question 2: use uniroot, or should I? t=5.49116 +?uniroot + +root=uniroot(function(x), f1(x), c(-1, 1), x=26)$root +root$root + +#Werkt niet echt: formule ombouwen +#Bp1={exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)}=26 + +#Question 3: Create derivative and put to 0 +x=0 +df.f1 = Deriv(f1, "x") +df.f1 + +#FOR QUESTION 3: put in x=0! Run df.f1 for exact formula -> t=0.4384615 +function (x) +{ + .e1 <- 13 + 0 + (5.7 - 0 * (0.1482 + 5.7/.e1)) * exp(-(0.026 * 0))/.e1 +} + +#Question 4: IF PREVIOUS ANSWER IS CORRECT +#t=100000000 #for Q5 + +Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t) +Bp1 #[1] 24.7164 ?? + +#Quick Curve +curve(f1(x), -10, 40000, col="red", ylab="f1(x)") + +##QUESTION 3+4 LATER? + +#Question 5: Just put in a huge number... + +#Question 6: FUNCTION + CHANGE TO X +Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t) +f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)} + +Bp2=exp(3.2)+((6.3*t)/(t+5.0))*exp(-0.06*t) +f2=function(x) {exp(3.2)+((6.3*x)/(x+5.0))*exp(-0.06*x)} + +uniroot(function(x) f1(x)-f2(x), c(12, 24))$root +#WAAROM ANDERS ALS c(12, 14)? + +x <- exp(10) + +#Create curve +curve(f1(x), 13.50909, 13.5091, col="red", ylab="biomarker concentration", xlab="time", ylim=c(26.576903, 26.57695)) +curve(f2(x), 13.50909, 13.5091, col="blue", add=TRUE) + +#Question 7: Intersect of both lines... +#Done manually need to use uniroot + +#Question 8: See new line! + +#Question 3: Again! +Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t) +f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)} + +Bp2=exp(3.2)+((6.3*t)/(t+5.0))*exp(-0.06*t) +f2=function(x) {exp(3.2)+((6.3*x)/(x+5.0))*exp(-0.06*x)} +f1(0) + +plot((x), f1, f2) + +Deriv(f1, "x") +function (x) +{ + .e1 <- 13 + 0 + (5.7 - 0 * (0.1482 + 5.7/.e1)) * exp(-(0.026 * 0))/.e1 +} + +f=expression(exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)) +D(f, 'x') + +ThisIs=(5.7/(x + 13) - (5.7 * x)/(x + 13)^2) * exp(-0.026 * x) - ((5.7 * + x)/(x + 13)) * (exp(-0.026 * x) * 0.026) +ThisIs + +plot((x),ThisIs) + +``` + +//QUESTION 8+ +```{r} +#install.packages("deSolve") +library("deSolve") + +#Question 8: new equasion +k1=0.3 +k2=0.1 +k3=0.9 +b=2.0 +S=10.0 +R=3.0 + +MR=k1*S*(R-b)+(k2*(R-b)^2)-(k3*(R-b)^3) + +#USING ODE +RThisProtein = function(timepoint, state, parameters) { +with(as.list(c(state, parameters)), { +dR = k1*S*(R-b)+(k2*(R-b)^2)-(k3*(R-b)^3) +list(c(dR)) +}) +} + +pars = c(k1=0.3, k2=0.1, k3=0.9, b=2.0, S=30, 30, 30, 30, 30, seq(30, 5, by=1.66667)) +inistate = c(R=3.0) +inistate.1 = c(R = 0) +inistate.2 = c(R = 2) +inistate.3 = c(R = 4) +inistate.4 = c(R = 6) +finish = seq(0, 2, by = 0.1) + +library("deSolve") +out.1 = ode(y = inistate.1, times = finish, func = RThisProtein, parms = pars) +out.2 = ode(y = inistate.2, times = finish, func = RThisProtein, parms = pars) +out.3 = ode(y = inistate.3, times = finish, func = RThisProtein, parms = pars) +out.4 = ode(y = inistate.4, times = finish, func = RThisProtein, parms = pars) + +plot(out.1, main = "time", ylab = "Synthesis of R", col="red", ylim=c(0, 6)) +lines(out.2, col="Green") +lines(out.3, col="blue") +lines(out.4, col="yellow") + +library("phaseR") +#Question 10: time 0-5 + +phaseport = phasePortrait(RThisProtein, parameters = pars, ylim = c(0, 20), points = 10, +xlab = "Time", ylab = "dR/dt", state.names = c("R"), main="Q13") +grid() # the function grid adds a grid to the plot + +?title + + +``` + + diff --git a/create_IFP_datasets_v2_dense.py b/RP1/create_IFP_datasets_v2_dense.py similarity index 100% rename from create_IFP_datasets_v2_dense.py rename to RP1/create_IFP_datasets_v2_dense.py diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..6ef3117 --- /dev/null +++ b/readme.md @@ -0,0 +1,3 @@ +Collection of scripts from the Leiden University. RP1 and RP2 are scripts from my first and second research project (Leiden University & Galapagos). BOO concerns the 'Bachelors Onderzoek Opdracht' - a bachelor thesis. + +Other scripts are either made or obtained during courses, but have not specific project attached to them. \ No newline at end of file