Adding old r scripts

This commit is contained in:
Jonathan Herrewijnen 2022-12-04 20:38:24 +00:00
parent 731466dc9a
commit 091c544cb5
9 changed files with 1527 additions and 0 deletions

View File

@ -0,0 +1,140 @@
---
title: "BOOScript"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
require(tidyverse)
require(MotilityLab)
require(magrittr)
require(tidyr)
require(stats)
```
Basic peace of code to show the amount of cells over time (MCS) in the model. The second graph takes the root of these cells, the third one the cubic root. We believe it is most comparable to take the square root and not the cubic root (initially because volume is cubic, as is our data.
We calculated this by getting the r out of the Volume (4/3*pi*r(1/3): volume of a sphere), and then using that r to calculate the surface area of a square area (pi*r^2: surface area)
```{r}
#PLOT AMOUNT OF CELLS/CTL
setwd("D:/BFW/BOO/TestRuns/BOOModel_/")
Logger<- read.csv(file ="logger.txt", sep="")
plot(Logger$time, Logger$celltype.cells.size, main="Amount of cells in model", xlab="Time in MCS", ylab="Amount of cells in model", col = "blue")
plot(Logger$time, Logger$celltype.cells.size, main="Amount of cells in model (log)", xlab="Time in MCS (Logarithmic)", ylab="Amount of cells in model", log = "x", col = "blue")
plot(Logger$time, Logger$celltype.CTL.size, main="Amount of CTL in model", xlab="Time in MCS", ylab="Amount of cells in model")
setwd("D:/BFW/BOO/TestRuns/BOOModel_12/")
Logger2<- read.csv(file ="logger.txt", sep="")
plot(Logger2$time, Logger2$celltype.cells.size, main="Amount of cells in model", xlab="Time in MCS", ylab="Amount of cells in model", col = "red")
plot(Logger2$time, Logger2$celltype.cells.size, main="Amount of cells in model (log)", xlab="Time in MCS (Logarithmic)", ylab="Amount of cells in model", log = "x", col = "red")
Logger <- as.data.frame(Logger)
Logger$SurfaceSize <- Logger$celltype.cells.size
Logger2 <- as.data.frame(Logger2)
Logger2$SurfaceSize <- Logger2$celltype.cells.size
#PLOT CELLS IN MM2 (PLUS LOG)
Logger$SurfaceSize <- sqrt(Logger$SurfaceSize)
plot(Logger$time, Logger$SurfaceSize, main="Amount of cell area (mm2)", xlab="Time in MCS", ylab="sqrt of n cells", col = "blue")
plot(Logger$time, Logger$SurfaceSize, main="Amount of cell area (mm2)(log)", xlab="Time in MCS (Logarithmic)", ylab="sqrt of n cells", log = "x", col = "blue")
Logger2$SurfaceSize <- sqrt(Logger2$SurfaceSize)
plot(Logger2$time, Logger2$SurfaceSize, main="Amount of cell area (mm2)(log)", xlab="Time in MCS", ylab="sqrt of n cells", col = "red")
plot(Logger2$time, Logger2$SurfaceSize, main="Amount of cell area (mm2)", xlab="Time in MCS (Logarithmic)", ylab="sqrt of n cells", log = "x", col = "red")
#PLOT CELLS IN MM3
Logger$VolumeSize <- Logger$celltype.cells.size
Logger$VolumeSize <- Logger$VolumeSize^(1/3)
plot(Logger$time, Logger$VolumeSize, main="Approximate amount of cell area (cubic, mm3)", xlab="Time in MCS", ylab="^(1/3) of n cells")
```
NEXT UP
Calculation and showing of CTL tracks, and thus migration speed and MSD, this can than be compared to the graphs we have from Weigelin et al.
```{r}
#CELL TRACKS, not sure how it is presented however, just for the sake of it.
data_tracks <- as.tracks.data.frame(Logger, id.column="cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y"))
#plot(data_tracks, main ="cell tracks density 100")
#plot(data_tracks)
data_tracks2 <- as.tracks.data.frame(Logger2, id.column="cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y"))
#plot(data_tracks, main ="cell tracks density 100")
#plot(data_tracks)
#celltracks <- as.tracks.data.frame(Logger, id.column = "cell.id", time.column = "time", pos.columns = c("cell.center.x", "cell.center.y"))
#plot(celltracks)
#MIGRATION SPEED
speed_CTL <- list()
for(i in seq_along(data_tracks)){
speed_CTL[[i]] <- speed(data_tracks[[i]])
}
speed_CTL2 <- list()
for(i in seq_along(data_tracks2)){
speed_CTL2[[i]] <- speed(data_tracks2[[i]])
}
#HERE THE ID's OF THE CTL CELLS (Define amount of tumor first, then calculate the amount of CTL's)
speed_cells_frame <- as.data.frame(speed_CTL[51:70])
speed_cells_frame_long <- gather(speed_cells_frame)
speed_cells_frame_long<- speed_cells_frame_long$value
boxplot(speed_cells_frame_long, main = "speed density 100")
?plot
speed_cells_frame2 <- as.data.frame(speed_CTL2[51:70])
speed_cells_frame_long2 <- gather(speed_cells_frame2)
speed_cells_frame_long2<- speed_cells_frame_long2$value
boxplot(speed_cells_frame_long2, main = "speed density 100")
?plot
#MEAN SQUARE DISPLACEMENT with CI 95%
# "mean.ci.95" Outputs the mean and upper and lower bound of a parametric 95 percent confidence intervall.
CTL_msqrd <- aggregate(data_tracks, squareDisplacement, FUN="mean.ci.95")
CTL_msqrd2 <- aggregate(data_tracks2, squareDisplacement, FUN="mean.ci.95")
# data set with ONLY i and the Mean Sqr displacement
CTL_msqrd_1 <- subset(CTL_msqrd, select = -c(lower, upper))
CTL_msqrd_2 <- subset(CTL_msqrd2, select = -c(lower, upper))
plot(CTL_msqrd_1, xlab="time step",
ylab="mean square displacement", main="Mean Square Displacement", type="l", col="blue")
lines(CTL_msqrd_2, xlab="time step",
ylab="mean square displacement", main="Mean Square Displacement", type="l", col="red")
plot(CTL_msqrd_2, xlab="time step",
ylab="mean square displacement", main="Mean Square Displacement", type="l", col="red")
lines(CTL_msqrd_1, xlab="time step",
ylab="mean square displacement", main="Mean Square Displacement", type="l", col="blue")
legend(1, 60000, legend=c("No treatment, control", "CD137 treatment"),
col=c("red", "blue"), lty=1:2, cex=0.8)
```
```{r}
#AMOUNT OF CELLS KILLED
#The way this works is, we take the amount of cells proliferated, add the amount of cells spawned in with them. That is the total amount of cells we then COULD have in the model. But since CTL kill the cells, we take the total amount of cells in the model and calculate the difference: (All TC proliferation + initial TC amount) - Amount of cells in model
setwd("D:/BFW/BOO/MorpheusRuns/BOOModel_565/")
celldivision <- read.delim("cell_division_cells.txt")
AmountDivisions <- (nrow(celldivision)-1)
AmountDivisions
#MANUAL INPUT TWICE
TotalPossibleCells <- 50 + AmountDivisions
TotalCellsInModel <- tail(Logger$celltype.cells.size, n=1)
TotalCellsKilled <- TotalPossibleCells - TotalCellsInModel
TotalPossibleCells
TotalCellsInModel
TotalCellsKilled
celldivision
plot(Logger$time, Logger$celltype.cells.size)
mc.cores
```

144
Other/06 Sept CBR.Rmd Normal file
View File

@ -0,0 +1,144 @@
---
title: "BCR 06 Sept"
author: "Jonathan Herrewijnen"
date: "September 6, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
//CBR Two dimensional phase portraits, equilibria and stability
```{r}
#install.packages("phaseR")
library("phaseR")
# define 2D ODE model
model = function(timepoint, state, parameters) {
with(as.list(c(state, parameters)), {
dx = -2 * x - 3 * y
dy = -x - 2 * y
list(c(dx, dy))
})
}
# for setting the axis limits
xlim = c(-10, 10)
ylim = c(-10, 10)
# no parameters in the ODE, so we set pars as an empty vector
pars = vector()
# draw vector field
model.flowField = flowField(xlab = "x", ylab = "y", model, xlim = xlim, ylim = ylim,
parameters = pars, points = 19, add = FALSE, state.names = c("x", "y"))
# draw nullclines
model.nullclines = nullclines(model, xlim = xlim, ylim = ylim, parameters = pars,
points = 500, state.names = c("x", "y"))
# draw trajectory
model.trajectory = trajectory(model, y0 = c(-5, -5), tlim = c(0, 10), parameters = pars,
state.names = c("x", "y"))
```
//Numerical Solution
```{r}
# perform numerical simulation and plot end result
model.numericalSolution = numericalSolution(model, y0 = c(-5, -5), tlim = c(0,
20), type = "one", parameters = pars, col = c("green", "orange"), ylim = ylim,
add.legend = F)
# make legend for plot
legend("topright", col = c("green", "orange"), legend = c("x", "y"), lty = 1)
# after the next command, click mouse to find values of a point within the
# phase portrait (e.g., to approximate the value of an equilibrium)
eq = locator(n = 1)
eq
```
//QUESTION 1
The 0, 0 location was determined from the first phase portrait!
Not from the second click portrait!
```{r}
#?stability
stability(model, ystar = c(0, 0))
#stability(deriv, ystar = NULL, parameters = NULL, system = "two.dim", h = 1e-07, summary = TRUE, state.names = c("x", "y"))
```
//Question 2
Meaning of T and Delta.
T=(I THINK!) the value of the equilibrium at dx/dt=0 -> So for the equilibrium, in our case the equilibrium is stable.
Delta="In the two dimensional system case, value of the Jacobians determinant at y.star. ->So the Jacobian matrix
//Question 3: PhasePlaneAnalysis
```{r}
# perform phase plane analysis
phasePlaneAnalysis(model, xlim = xlim, ylim = ylim, parameters = pars)
```
//QUESTION 4-8
```{r}
library("phaseR")
# define 2D ODE model
model = function(timepoint, state, parameters) {
with(as.list(c(state, parameters)), {
#Question 4
dx = -x + (2 * y)
dy = (-5 * x) - y
#Question 5
#dx = -3*x -2*y
#dy = -2*x -y
list(c(dx, dy))
})
}
# for setting the axis limits
xlim = c(-10, 10)
ylim = c(-10, 10)
# no parameters in the ODE, so we set pars as an empty vector
pars = vector()
#Determine Stability
stability(model, ystar = c(0, 0))
# perform phase plane analysis
phasePlaneAnalysis(model, xlim = xlim, ylim = ylim, parameters = pars)
```

View File

@ -0,0 +1,311 @@
---
title: "Test preparation 14 Sept"
author: "Jonathan Herrewijnen"
date: "September 14, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
```{r}
#Library etc
require(reshape2)
iris$id <- rownames(iris)
iris_long <- melt(iris)
## Using Species, id as id variables
iris_wide <- dcast(formula = Species + id ~ variable, data = iris_long )
head(iris_wide)
#Calculate the average of each measurement for long/wide
head(iris_wide)
aggregate(formula = .~ Species, data = iris_wide[, -2], FUN = mean)
#Offff
aggregate(cbind(Sepal.Length, Sepal.Width) ~Species, data=iris_wide, FUN =mean)
aggregate(formula = value ~ Species + variable, data = iris_long, FUN = mean)
#Create list from data
iris_list <- split(iris, iris$Species)
head(iris_list)
summary_list = alist()
for( i in seq_along(iris_list)){
summary_list[[i]] <- summary(iris_list[[i]])
}
summary_list
#Create list using lapply()
lapply(iris_list, summary)
#Cars library
head(mtcars)
wt_hp_perCyl <- aggregate(cbind(wt, hp) ~ cyl, data = mtcars, FUN = mean)
wt_hp_perCyl
#merging
merged_result <- merge(mtcars, wt_hp_perCyl, by = "cyl", suffixes = c("", "_mean"))
head(merged_result)
#Selecting
sel_mtcars <- mtcars[ (mtcars$cyl == 6 | mtcars$cyl == 8) & mtcars$gear <= 3 & mtcars$mpg > 19 ,]
sel_mtcars
```
//PLOTS ETC
```{r}
plot(x = iris$Sepal.Length, y = iris$Sepal.Width)
abline(a = 3.5, b = -0.1, col = 'red', lwd = 3)
myFun <- function(input_vector, a ,b) {
y <- a + b*input_vector
return(y)
}
lines(iris$Sepal.Length,myFun(input_vector = iris$Sepal.Length, a = 4, b = -0.15), col = 'blue', cex = 3 )
par(mfrow = c(2, 2)) # for plotting 4 graphs in a single figure.
with(iris, hist(Sepal.Length))
with(iris, hist(Sepal.Width))
with(iris, hist(Petal.Length))
with(iris, plot(iris$Sepal.Length, Petal.Width))
require(ggplot2)
summary(midwest)
#SELEcTING SOMETHING FROM A GRAPH
diamonds[ diamonds$y == 58.9, ]
```
//ACTUAL TEST HERE
```{r}
a <- 7
myFun <- function(x) {
a <- 2
y <- x + a
return(y)
a
}
summary(a)
plot(Sepal.Length~Sepal.Width,data = iris)
plot(Sepal.Length~Species,data = iris)
summary(iris)
my_data <- iris_wide
iris_wide
my_data[[3]]$Variable[8]
iris$Species[51]
arg1 <- 1
arg2 <-2
input <- 3
myFun <- function(arg1, arg2, x) {
if( is.numeric(c(arg1,arg2))){
result <- cor(arg1, arg2)
} else{
print("Input should be numeric") }
}
y <- myFun(1, 2, x)
y
```
```{r}
myFun <- function(arg1, arg2, input) {
if( is.numeric(c(arg1,arg2))){
result <- cor(arg1, arg2)
} else{
print("Input should be numeric") }
}
y <- myFun(1, 2, 5)
y+1
cor(1, 2)
a<-1
b<-2
myFun <- function(a, b) {
result1 <- a * b
result2 <- a^b
result3 <- seq(from = a, to = b, length.out = 10)
my_list <- list()
my_list[[1]] <- result1
my_list[[2]] <- result2
my_list[[3]] <- result3
return(my_list)
}
myFun(10, 20)
summary(iris)
iris_slct <- iris[(iris$Species == "setosa" & iris$Sepal.Length<4.5) | (iris$Species =="versicolor" & iris$Sepal.Width>3.5), ]
iris_slct2 <- iris[(iris$Species =="virginica" & iris$Sepal.Width>3.5), ]
iris_slct
iris_slct2
library(reshape2)
require(reshape2)
?sapply
?ggplot
?aes
?aggregate
?by
?split
?reshape2
?cars
split(iris)
```
//THE EXERCISE
dat is the input dataframe
cat_colnames is the column name to summarize by.
sel_colname is the column what will be filtered/ selected over
sel_value is the value we filter/select over
The function should select all rows smaller than the sel_value within the sel_colname column.
The function should calculate the mean of all columns of your selected data per level of your cat_colname column.
The result of the function must be a list containing the result of the filtering and the result of the summarization.
In your R- script also run the function you created, you can use the iris dataset to test.
Save the result of your function to disk using the save() function
Write the function in a R script. Save the file with the same name as the function and with the .R extension.
Upload the .R file and the upload the result of your function to BB.
```{r}
dat <-iris
#To get you started here are some hints:
my_fun <- function(dat, cat_colname, sel_colname, sel_value) {
#to filter/ select using the variable argument use:
dat[dat[ ,sel_colname]<sel_value, ]
#calculate mean in a for loop
for(i in seq_along(cat_colname)){
dat2[[i]]<-mean(dat[[i]])
}
#to summarize use in the aggregate function:
dat3 <- as.formula(paste0(". ~", cat_colname))
my_list2 <-list()
my_list2[[1]] <-dat2 #This is the mean
my_list2[[2]] <-dat #This is the selection
my_list2[[3]] <-dat3 #This is the summary
}
my_fun(1, 2, 3, 4)
```
//QUESTION 12
Use ggplot to plot the esoph dataset.
In 1 figure create a separate bar plot for each alcohol and age group combination. There are 6 age groups and 4 alcohol groups so there should be 24 bar plots.
Map the ncases and ncontrols to the x-axis (so there are 2 bars per plot). Map the corresponding values to the y-axis. Map the tobacco group to the fill aesthetic.
Save the figure, export as pdf using RStudio. With the export / save as pdf and preview you can easily adjust the size (use larger numbers to fit in more graphs)
If the Rstudio window for plotting is not working you can use pdf(file = , height = 12.., width = 12) {plot here} dev.off()
Don't forget to upload the plot!
Hint: first melt the numeric data to 1 column
```{r}
require(ggplot2)
require(reshape2)
head(midwest)
my_alcohol_plot <- esoph
my_alcohol_plot <- melt(esoph)
head(my_alcohol_plot)
par(mfrow=c(2, 12))
ggplot(data=my_alcohol_plot, aes(x=agegp, y=alcgp), hist)
sapply(esoph[, 1:4], hist)
1+1
require(stats)
require(graphics) # for mosaicplot
summary(esoph)
## effects of alcohol, tobacco and interaction, age-adjusted
model1 <- glm(cbind(ncases, ncontrols) ~ agegp + tobgp * alcgp,
data = esoph, family = binomial())
anova(model1)
## Try a linear effect of alcohol and tobacco
model2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp)
+ unclass(alcgp),
data = esoph, family = binomial())
summary(model2)
## Re-arrange data for a mosaic plot
ttt <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
o <- with(esoph, order(tobgp, alcgp, agegp))
ttt[ttt == 1] <- esoph$ncases[o]
tt1 <- table(esoph$agegp, esoph$alcgp, esoph$tobgp)
tt1[tt1 == 1] <- esoph$ncontrols[o]
tt <- array(c(ttt, tt1), c(dim(ttt),2),
c(dimnames(ttt), list(c("Cancer", "control"))))
mosaicplot(tt, main = "esoph data set", color = TRUE)
str(esoph)
plot.new()
boxplot(x=esoph$agegp, y=esoph$alcgp)
1+1
```

View File

@ -0,0 +1,173 @@
---
title: "Case Study Sleep"
author: "Jonathan Herrewijnen"
date: "September 17, 2018"
output: word_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
#For knitting deactivated
#install.packages("data")
data(sleep)
head(sleep)
sleep
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
We found two groups in our data, but we are not sure whether the second group is our control group or a second medicine. This is not clarified clearly in the data description. So we are assuming that the second group is the control group, but we are not certain.
//OPDRACHTEN - EXERCISES
Question 1)
Describe the dataset by explaining the variables. If any what are the replicates and/or controls?
```{r}
summary(sleep)
sleep
?sleep
```
See the code above, the table "sleep" contains 3 variables, namely: extra, group, ID.
-"extra" stands for the extra hours of sleep a student who took a certain drug
-"group" stands for the drug a student was given, with 2 different types of drugs
-"ID" stands for the patient, one ID per patient/student
20 observations and 3 variables are contained within the table. Second group is the control group.
Question 2)
Describe the performed experiment/ analysis. What was of interest/ what was the research question?
Students were given a drug, two seperate groups were created, in which the first group got a sleep prolongation drug, and second group was used as control group. The researchers were interested in the amount of prolonged sleep in hours.
Using ?sleep.
Question 3)
Summarize the data statistically.
See the summary and t.test below in the next code snippet box
Question 4)
Are there any clear outliers or missing values? If so, which ones?
The boxplot is not showing any outliers, so no outliers are visible.
```{r}
require(stats)
#QUESTION THREE
summary(sleep)
#Simple t.test
with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE))
?sleep
?boxplot
#QUESTION FOUR
#Individual plots
boxplot(with(sleep, extra[group==1]))
boxplot(with(sleep, extra[group==2]), add=TRUE)
boxplot(with(sleep, extra[group==1], extra[group==2]), paired=TRUE)
#plot(with(sleep, extra[group==2]), add = TRUE)
boxplot(sleep$extra ~ sleep$group, xlab="Groups", ylab="Sleep prolongation (h)")
points(sleep$extra ~ sleep$group, pch = 1)
```
Question 5)
Summarize the data graphically using only 1 figure. Choose a figure that summarizes the data adequately. Use the graphical package ggplot2. Explain your choice.
We have two groups and only one variable. So a boxplot is a logical choice for comparing both datasets.
```{r}
library(ggplot2)
ggplot(sleep, aes(x=group, y=extra)) + geom_boxplot()
```
Question 6)
Explore the data, include check for distribution/ normality range of variables mention number and type of variables and observations
```{r}
#Checking for distribution using a QQplot
par(mfrow=c(2, 1)) #plots 2 graphs
with(sleep, hist(extra[group==1]))
with(sleep, hist(extra[group==2]))
```
No distribution seems to be visible (if so, then a bell curve would be expected)
Question 7)
Calculate the mean sleep prolongation of the two drugs using the aggregate function on the sleep data frame.
```{r}
#Calculate mean sleep
aggregate(formula = extra~group, data = sleep, FUN = mean)
```
See function above, results are 0.75 hours and 2.33 hours extra sleep for drug one and two respectively
Question 8)
Calculate the difference in sleep prolongation based on drug type for each patient.
Store this new dataset in a new data frame including patient number. Do this by writing a function with as input argument the sleep data frame and as output the new data frame.
```{r}
my_dataframe <- data.frame(with(sleep, extra[group==1]-extra[group==2]))
my_dataframe
```
See above.
Question 9)
Is the sleep prolongation difference between the drugs (more) normaly distributed?
```{r}
shapiro.test(sleep$extra)
hist(sleep$extra)
```
So the p-value here is 0.3114, meaning that it differs signifcantly from a normal distribution. We can not assume that the data is normaly distributed.
Question 10)
Was there a significant difference in sleep prolongation between the drugs? Mention the effect size, is this of a relevant magnitude? Explain why.
Also explain why you chose this specific test.
```{r}
ThisData <- data.frame(with(sleep, mean(extra[group==1])), with(sleep, mean(extra[group==2])))
ThisData
with(sleep, t.test(extra[group == 1], extra[group == 2], paired = TRUE))
```
The mean between both drugs is very different, 0.75 to 2.33, implying a difference.
The t.test shows a difference of -1.58 between the test with a reliability of 0.002833.

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,216 @@
---
title: "Assignment ANOVA"
author: "Jonathan Herrewijnen"
output:
html_document:
theme: cerulean
toc: yes
---
Gebruik de tutorial om deze opdrachten te maken.
## Question 1: Coagulation
### Beschrijving data
Blood coagulation times by diet
The example dataset we will use is a set of 24 blood coagulation times. 24 animals were randomly assigned
to four different diets and the samples were taken in a random order. This data comes from Box, Hunter,
and Hunter (1978).
This data frame contains the following columns
* coag is the coagulation time in seconds
* diet are the diet type, here A,B,C or D
### Vragen
Q1A. Wat voor type data zijn coag en diet ?
Coag=nominaal; Diet=ordinaal
Q1B. Wat voor type statistische model(len) kunnen we gebruiken voor de analyse van deze data?
Anova
Q1C. Welke variabele is de response en welke de verklarende variabele?
Respons(y-as)=coag; Verklarende(x-as)=diet.
Q1D. Wat is de null-hypothese
H0: Er is geen effect van de diëten op de coagulatietijd.
### Inlezen van de data.
In deze vraag gebruiken we data uit het faraway pakket. Dit pakket moet je eerst downloaden als dat nog niet eerder is gedaan. Je kan controleren of het faraway pakket al is geinstaleerd via de `Package` tab in het venster rechtonder en type faraway in het zoekveld. Als deze niet is geinstalleerd kan je de install knop aanklikken in hetzelfde venster.
```{r}
library(faraway)
data(coagulation)
attach(coagulation)
```
Analyseer de data door gebruik te maken van de statistische werkvolgorde, zoals weergegevn in de ANOVA tutorial, en beantwoordt de volgende vragen.
Q1D. Zijn de aannamens voor het uitvoeren van een ANOVA correct?
Ja, alle gemiddeldes liggen rond de 0. Anova, analyse van varianties
```{r}
model<-aov(coag~diet)
str(model)
model$residuals
plot(model$residuals~diet)
```
Q1E. Mogen we de null-hypothese verwerpen?
```{r}
summary(model)
library(car)
qqPlot(model$residuals)
```
Ja, de waarde valt onder de 0,05.
Q1F. Welke waarde heeft de test-statistiek?
13,57 volgens de summary.
Q1G. Welke diet-afhankelijke coagulatie-tijden verschillen statistisch significant van elkaar?
Volgens mij code schrijven die de uiterste waarden weergeeft, en die met mekaar vergelijken.
## Question 2 Rabbit data
### Beschrijving van de data
A nutritionist studied the effects of six diets, on weight gain of domestic rabbits. From past experience with sizes of litters, it was felt that only 3 uniform rabbits could be selected from each available litter. There were ten litters available forming blocks of size three. In this analysis we only investigate the effect of diets on weight gained.
* treat is the diet
* gain is the weight gain
### Vragen.
Q2A. Wat voor type data zijn Treat en Weight?
Treat=dieet=6 verschillende=ordinaal
Weith=Waarde=Numeriek
Q2B. Wat voor een type model(len) kan je gebruiken om deze data te analyseren? Verklaar.
Alleen Anova, 6 vergelijkingen
Q2C. Wat zijn de de response variabele en verklarende variabele in deze dataset ?
Response=y-as=Weight
Verklarende=x-as=dieet
De data komen weer uit het faraway pakket.
```{r}
data(rabbit)
attach(rabbit)
```
Analyseer de data door gebruik te maken van de tutorial en beantwoord de volgende vragen.
Q1D. Zijn de aannamens voor het uitvoeren van een ANOVA correct? Verklaar.
Q1E. Mogen we de null-hypothese verwerpen? Verklaar.
Q1F. Welke niveaus of de diet factor verschillen statistisch significant van elkaar?
## Q3
A drug company tested three formulations of a pain relief medicine for
migraine headache sufferers. For the experiment 27 volunteers were selected
and 9 were randomly assigned to one of three drug formulations. The subjects
were instructed to take the drug during their next migraine headache episode and
to report their pain on a scale of 1 to 10 (10 being most pain).
### Inlezen data
we voeren de data nu met de hand in.
```{r}
Pain = c(4, 5, 4, 3, 2, 4, 3, 4, 4, 6, 8, 4, 5, 4, 6, 5, 8, 6, 6, 7, 6, 6, 7, 5, 6, 5, 5)
Drug = gl(3,9,labels=c("A", "B", "C"))
DATA<-data.frame(Pain, Drug)
```
Gebuik de tutorial om de data te analyseren in de context van de volgende vraag: Is er een verschil in pijn bestrijding door de verschillende medicijnen bij migraine?
## Q4: Typical exam questions
De tabel hieronder is gegeven maar er missen wat data (?).
```
Df Sum Sq Mean Sq F value Pr(>F)
VerklarendeF 3 ?1 2.199 ?3 0.0945 .
Residuals 36 34.53 ?2
```
Q4.A Bereken de waarden die op de plaatsen van de vraagtekens moeten staan (?).
Q4.B Welke eigenschappen van de t-statistiek en F-statistiek komen overeen?
## Simulatie ANOVA experiment.
Eerst maken we de factor. We hebben twee versies nodig van de factor, een numerieke en een categorische. De numerieke is alleen een lijst van de niveaus gemiddelde.
```{r}
VerklarendeN<-c(rep(2,10),rep(2.2,10),rep(2.8,10),rep(3.4,10) )
```
dan de categorische versie wat een naam aan de gemiddelde koppeld.
```{r}
VerklarendeF<-gl(4,10, labels=c("hokus", "Pokus", "Pilatus", "Pas"))
```
We maken nu een ANOVA model alleen met de gemiddelde en plotten het resultaat
```{r}
Response<-VerklarendeN
plot(Response~VerklarendeF)
```
We zien alleen de verschillende factor gemiddelde liggen, de between-group variance.
Nu voegen we de rest term toe ofwel we geven de within-group varianec een waarde. We voegen daarom een stochatische variabele toe met een gemiddelde van 0 en een bepaalde standaard deviatie (sd) hier 1.2.
```{r}
ResponseR<-VerklarendeN+rnorm(40,0,1.2)
Dat2<-data.frame(ResponseR,VerklarendeF)
summary(Dat2)
plot(ResponseR~ VerklarendeF)
points(ResponseR~ VerklarendeF)
```
Voer een analyse uit op deze data en pas de gemiddelde zo aan dat de test net significant is. Hiervoor moeten ze de VerklarendeF
aanpassen. Letop ik gebruik geen set.seed.

180
Other/Test ICT 7 Sept.Rmd Normal file
View File

@ -0,0 +1,180 @@
---
title: "Test 7 Sept"
author: "Jonathan Herrewijnen"
date: "September 7, 2018"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```{r cars}
summary(cars)
```
## Including Plots
You can also embed plots, for example:
```{r pressure, echo=FALSE}
plot(pressure)
```
Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
//Question 1
```{r}
library(Deriv)
t=5.49116
Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t)
Bp1
#T VERVANGEN VOOR X
f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)}
#Question 2: use uniroot, or should I? t=5.49116
?uniroot
root=uniroot(function(x), f1(x), c(-1, 1), x=26)$root
root$root
#Werkt niet echt: formule ombouwen
#Bp1={exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)}=26
#Question 3: Create derivative and put to 0
x=0
df.f1 = Deriv(f1, "x")
df.f1
#FOR QUESTION 3: put in x=0! Run df.f1 for exact formula -> t=0.4384615
function (x)
{
.e1 <- 13 + 0
(5.7 - 0 * (0.1482 + 5.7/.e1)) * exp(-(0.026 * 0))/.e1
}
#Question 4: IF PREVIOUS ANSWER IS CORRECT
#t=100000000 #for Q5
Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t)
Bp1 #[1] 24.7164 ??
#Quick Curve
curve(f1(x), -10, 40000, col="red", ylab="f1(x)")
##QUESTION 3+4 LATER?
#Question 5: Just put in a huge number...
#Question 6: FUNCTION + CHANGE TO X
Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t)
f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)}
Bp2=exp(3.2)+((6.3*t)/(t+5.0))*exp(-0.06*t)
f2=function(x) {exp(3.2)+((6.3*x)/(x+5.0))*exp(-0.06*x)}
uniroot(function(x) f1(x)-f2(x), c(12, 24))$root
#WAAROM ANDERS ALS c(12, 14)?
x <- exp(10)
#Create curve
curve(f1(x), 13.50909, 13.5091, col="red", ylab="biomarker concentration", xlab="time", ylim=c(26.576903, 26.57695))
curve(f2(x), 13.50909, 13.5091, col="blue", add=TRUE)
#Question 7: Intersect of both lines...
#Done manually need to use uniroot
#Question 8: See new line!
#Question 3: Again!
Bp1=exp(3.2)+((5.7*t)/(t+13.0))*exp(-0.026*t)
f1=function(x) {exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x)}
Bp2=exp(3.2)+((6.3*t)/(t+5.0))*exp(-0.06*t)
f2=function(x) {exp(3.2)+((6.3*x)/(x+5.0))*exp(-0.06*x)}
f1(0)
plot((x), f1, f2)
Deriv(f1, "x")
function (x)
{
.e1 <- 13 + 0
(5.7 - 0 * (0.1482 + 5.7/.e1)) * exp(-(0.026 * 0))/.e1
}
f=expression(exp(3.2)+((5.7*x)/(x+13.0))*exp(-0.026*x))
D(f, 'x')
ThisIs=(5.7/(x + 13) - (5.7 * x)/(x + 13)^2) * exp(-0.026 * x) - ((5.7 *
x)/(x + 13)) * (exp(-0.026 * x) * 0.026)
ThisIs
plot((x),ThisIs)
```
//QUESTION 8+
```{r}
#install.packages("deSolve")
library("deSolve")
#Question 8: new equasion
k1=0.3
k2=0.1
k3=0.9
b=2.0
S=10.0
R=3.0
MR=k1*S*(R-b)+(k2*(R-b)^2)-(k3*(R-b)^3)
#USING ODE
RThisProtein = function(timepoint, state, parameters) {
with(as.list(c(state, parameters)), {
dR = k1*S*(R-b)+(k2*(R-b)^2)-(k3*(R-b)^3)
list(c(dR))
})
}
pars = c(k1=0.3, k2=0.1, k3=0.9, b=2.0, S=30, 30, 30, 30, 30, seq(30, 5, by=1.66667))
inistate = c(R=3.0)
inistate.1 = c(R = 0)
inistate.2 = c(R = 2)
inistate.3 = c(R = 4)
inistate.4 = c(R = 6)
finish = seq(0, 2, by = 0.1)
library("deSolve")
out.1 = ode(y = inistate.1, times = finish, func = RThisProtein, parms = pars)
out.2 = ode(y = inistate.2, times = finish, func = RThisProtein, parms = pars)
out.3 = ode(y = inistate.3, times = finish, func = RThisProtein, parms = pars)
out.4 = ode(y = inistate.4, times = finish, func = RThisProtein, parms = pars)
plot(out.1, main = "time", ylab = "Synthesis of R", col="red", ylim=c(0, 6))
lines(out.2, col="Green")
lines(out.3, col="blue")
lines(out.4, col="yellow")
library("phaseR")
#Question 10: time 0-5
phaseport = phasePortrait(RThisProtein, parameters = pars, ylim = c(0, 20), points = 10,
xlab = "Time", ylab = "dR/dt", state.names = c("R"), main="Q13")
grid() # the function grid adds a grid to the plot
?title
```

3
readme.md Normal file
View File

@ -0,0 +1,3 @@
Collection of scripts from the Leiden University. RP1 and RP2 are scripts from my first and second research project (Leiden University & Galapagos). BOO concerns the 'Bachelors Onderzoek Opdracht' - a bachelor thesis.
Other scripts are either made or obtained during courses, but have not specific project attached to them.