Homework 7: Data Manipulation using the dplyr package
Question 1
data(iris)
class(iris)
## [1] "data.frame"
150 Observations and 5 Variables
Question 2
iris1 <- filter(iris, Species %in% c("virginica","versicolor"), Sepal.Length>6, Sepal.Width>2.5)
56 Observations and 5 variables
Question 3
iris2<-select(iris1, Species, Sepal.Length, Sepal.Width)
56 Observations and 3 Variables
Question 4
iris3 <- arrange(iris2,by=desc(Sepal.Length))
head(iris3)
## Species Sepal.Length Sepal.Width
## 1 virginica 7.9 3.8
## 2 virginica 7.7 3.8
## 3 virginica 7.7 2.6
## 4 virginica 7.7 2.8
## 5 virginica 7.7 3.0
## 6 virginica 7.6 3.0
56 Observations and 3 Variables
Question 5
iris4<- mutate(iris3, Sepal.Area = Sepal.Length*Sepal.Width)
56 Observations and 4 Variables
Question 6
iris5<-summarize(iris4, meanSepal.Length = mean(Sepal.Length),meanSepal.Width = mean(Sepal.Width), sampleSizeIris4=n())
print(iris5)
## meanSepal.Length meanSepal.Width sampleSizeIris4
## 1 6.698214 3.041071 56
1 Observation and 3 Variables
Question 7
iris6 <- iris4 %>%
group_by(Species) %>%
summarize(meanSepal.Length = mean(Sepal.Length),meanSepal.Width = mean(Sepal.Width), sampleSizeIris4=n())
2 Observations and 4 Variables
Question 8
iris6b <- iris %>%
filter(Species %in% c("virginica","versicolor"), Sepal.Length>6, Sepal.Width>2.5) %>%
select(Species, Sepal.Length, Sepal.Width) %>%
arrange(by=desc(Sepal.Length)) %>%
mutate(Sepal.Area = Sepal.Length*Sepal.Width) %>%
group_by(Species) %>%
summarize(meanSepal.Length = mean(Sepal.Length),meanSepal.Width = mean(Sepal.Width), sampleSizeIris4=n())
Yields the same output
Question 9
iris %>%
pivot_longer(cols= 1:4,
names_to = "Measure",
values_to= "Value")
## # A tibble: 600 × 3
## Species Measure Value
## <fct> <chr> <dbl>
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Width 3.5
## 3 setosa Petal.Length 1.4
## 4 setosa Petal.Width 0.2
## 5 setosa Sepal.Length 4.9
## 6 setosa Sepal.Width 3
## 7 setosa Petal.Length 1.4
## 8 setosa Petal.Width 0.2
## 9 setosa Sepal.Length 4.7
## 10 setosa Sepal.Width 3.2
## # … with 590 more rows