library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Question 1
data(iris)
view(iris)
# The iris dataset has 150 observations and 5 variables.
Question 2
iris1 <- iris%>%
filter(Species %in% c("virginica","versicolor"), Sepal.Length > 6.0, Sepal.Width > 2.5)
view(iris1)
# There are 56 observations and 3 variables in the iris1 dataset.
Question 3
iris2 <- iris1 %>%
select("Species", "Sepal.Length", "Sepal.Width")
# There are 56 observations and 3 variables in the iris2 dataset.
Question 4
iris3 <- iris2 %>%
arrange(by=Sepal.Length)
head(iris3)
## Species Sepal.Length Sepal.Width
## 1 versicolor 6.1 2.9
## 2 versicolor 6.1 2.8
## 3 versicolor 6.1 2.8
## 4 versicolor 6.1 3.0
## 5 virginica 6.1 3.0
## 6 virginica 6.1 2.6
Question 5
iris4 <- iris3 %>%
mutate(Sepal.Area=Sepal.Length*Sepal.Width)
iris4
## Species Sepal.Length Sepal.Width Sepal.Area
## 1 versicolor 6.1 2.9 17.69
## 2 versicolor 6.1 2.8 17.08
## 3 versicolor 6.1 2.8 17.08
## 4 versicolor 6.1 3.0 18.30
## 5 virginica 6.1 3.0 18.30
## 6 virginica 6.1 2.6 15.86
## 7 versicolor 6.2 2.9 17.98
## 8 virginica 6.2 2.8 17.36
## 9 virginica 6.2 3.4 21.08
## 10 versicolor 6.3 3.3 20.79
## 11 virginica 6.3 3.3 20.79
## 12 virginica 6.3 2.9 18.27
## 13 virginica 6.3 2.7 17.01
## 14 virginica 6.3 2.8 17.64
## 15 virginica 6.3 3.4 21.42
## 16 versicolor 6.4 3.2 20.48
## 17 versicolor 6.4 2.9 18.56
## 18 virginica 6.4 2.7 17.28
## 19 virginica 6.4 3.2 20.48
## 20 virginica 6.4 2.8 17.92
## 21 virginica 6.4 2.8 17.92
## 22 virginica 6.4 3.1 19.84
## 23 versicolor 6.5 2.8 18.20
## 24 virginica 6.5 3.0 19.50
## 25 virginica 6.5 3.2 20.80
## 26 virginica 6.5 3.0 19.50
## 27 virginica 6.5 3.0 19.50
## 28 versicolor 6.6 2.9 19.14
## 29 versicolor 6.6 3.0 19.80
## 30 versicolor 6.7 3.1 20.77
## 31 versicolor 6.7 3.0 20.10
## 32 versicolor 6.7 3.1 20.77
## 33 virginica 6.7 3.3 22.11
## 34 virginica 6.7 3.1 20.77
## 35 virginica 6.7 3.3 22.11
## 36 virginica 6.7 3.0 20.10
## 37 versicolor 6.8 2.8 19.04
## 38 virginica 6.8 3.0 20.40
## 39 virginica 6.8 3.2 21.76
## 40 versicolor 6.9 3.1 21.39
## 41 virginica 6.9 3.2 22.08
## 42 virginica 6.9 3.1 21.39
## 43 virginica 6.9 3.1 21.39
## 44 versicolor 7.0 3.2 22.40
## 45 virginica 7.1 3.0 21.30
## 46 virginica 7.2 3.6 25.92
## 47 virginica 7.2 3.2 23.04
## 48 virginica 7.2 3.0 21.60
## 49 virginica 7.3 2.9 21.17
## 50 virginica 7.4 2.8 20.72
## 51 virginica 7.6 3.0 22.80
## 52 virginica 7.7 3.8 29.26
## 53 virginica 7.7 2.6 20.02
## 54 virginica 7.7 2.8 21.56
## 55 virginica 7.7 3.0 23.10
## 56 virginica 7.9 3.8 30.02
Question 6
iris5 <- iris4 %>%
summarize(AvgSepalLength=mean(Sepal.Length), AvgSepalWidth=mean(Sepal.Width), SampleSize=n())
print(iris5)
## AvgSepalLength AvgSepalWidth SampleSize
## 1 6.698214 3.041071 56
Question 7
iris6 <- iris4%>%
group_by(Species) %>%
summarize(Avg.Sepal.Length=mean(Sepal.Length),Avg.Sepal.Width=mean(Sepal.Width),Sample.Size=n())
print(iris6)
## # A tibble: 2 × 4
## Species Avg.Sepal.Length Avg.Sepal.Width Sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
Question 8
irisFinal<- iris%>%
filter(Species %in% c("virginica","versicolor"), Sepal.Length > 6.0, Sepal.Width > 2.5)%>%
select("Species","Sepal.Length","Sepal.Width")%>%
arrange(by=Sepal.Length)%>%
mutate(Sepal.Area=Sepal.Length*Sepal.Width)%>%
group_by(Species) %>%
summarize(Avg.Sepal.Length=mean(Sepal.Length),Avg.Sepal.Width=mean(Sepal.Width),Sample.Size=n())
print(irisFinal)
## # A tibble: 2 × 4
## Species Avg.Sepal.Length Avg.Sepal.Width Sample.Size
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.48 2.99 17
## 2 virginica 6.79 3.06 39
Question 9
Bigdataset <- iris %>%
pivot_longer(col=Sepal.Length:Petal.Width, names_to= "Measure", values_to = "Value")
print(Bigdataset)
## # A tibble: 600 × 3
## Species Measure Value
## <fct> <chr> <dbl>
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Width 3.5
## 3 setosa Petal.Length 1.4
## 4 setosa Petal.Width 0.2
## 5 setosa Sepal.Length 4.9
## 6 setosa Sepal.Width 3
## 7 setosa Petal.Length 1.4
## 8 setosa Petal.Width 0.2
## 9 setosa Sepal.Length 4.7
## 10 setosa Sepal.Width 3.2
## # ℹ 590 more rows