diff --git a/Exercise 09 Data - Sheet1.csv b/Exercise 09 Data - Sheet1.csv new file mode 100644 index 0000000..fb6b761 --- /dev/null +++ b/Exercise 09 Data - Sheet1.csv @@ -0,0 +1,31 @@ +Response_id,Gender,Sleep,GPA +134379,Female,7,3.8 +134408,Female,8,4 +134411,Female,8,3.95 +134516,Male,4,3.6 +134554,Male,9,3.5 +134673,Female,7,3 +134676,Female,5,3.61 +134682,Female,6,3.5 +134687,Female,5,3.6 +134844,Female,7,3 +134847,Male,5,3 +134858,Female,6,3.5 +135004,Female,6,3.6 +136642,Female,6,3 +136654,Male,7,2.8 +139078,Female,6,3 +139492,Male,6,3.2 +143540,Male,8,2.75 +147042,Male,7,4 +147077,Female,7,3.2 +147113,Female,6,3 +147130,Male,5,3.7 +147149,Male,8,3.987 +147152,Male,7,3.25 +147153,Female,6,3.8 +147169,Female,6,3.7 +149006,Male,6,2.5 +149020,Female,6,3.01 +149025,Male,9,3 +149027,Female,7,3.5 \ No newline at end of file diff --git a/Exercise09.Answers.R b/Exercise09.Answers.R new file mode 100644 index 0000000..6ea2626 --- /dev/null +++ b/Exercise09.Answers.R @@ -0,0 +1,32 @@ +#Exercise09 +#Question 1: Using data online to make a scatter plot of the variables +#Data comes from hours sleeping and GPA, with the expectation that more sleep leads to a higher GPA +#Set Working Directory +setwd("/Users/maxwellzupfer/Desktop/Exercise09") +#Load GGPlot and CowPlot +library(cowplot) +library(ggplot2) +#Load Data into an R file +Sleepdata=read.csv("Exercise 09 Data - Sheet1.csv", header=TRUE) +#Make Scatter Plot with trendline (Theme set to classic for aesthetic) +ggplot(Sleepdata, aes(x=GPA, y=Sleep))+xlab("GPA")+ylab("Hours of Sleep")+geom_point()+geom_smooth(method=lm)+theme_classic() +#This data is not what expected to see. The trend line does not follow the expected pattern of a higher GPA correlating with more sleep + +#Question 2 +#Set Working Directory +setwd("/Users/maxwellzupfer/Desktop/Exercise09") +#Load GGPlot and CowPlot +library(cowplot) +library(ggplot2) +#Read Data into R +data1=read.csv("data.txt", header=TRUE) +#Bar plot of the Mean observation for each region +ggplot(data1, aes(x=region, y=observations))+stat_summary(geom="bar", fun="mean")+xlab("Region")+ylab("Mean of Observations")+theme_bw() +#Scatter Plot of All Observations +ggplot(data1, aes(x=region, y=observations))+geom_jitter(alpha=0.1)+xlab("Region")+ylab("Observations") +#Do the Bar and Scatter Plots tell different Stories? +#The scatter plot and the bar plot tell two different stories of the data. Simply looking at the bar plot gives a mean of the observations for each region, which is very close to 15 for each region. Just looking at the bar plot would lead one to believe that the data for each region was very similar. +#When looking at the scatter plot, it demonstrates the distribution of the points for each region, for example the observations in the North region are all concentrated around the mean of 15, while in the South there are two groups, one concentrated around 7 observations and another around 22 observation, which will end up leading to a mean of 15 as well. +#I believe that looking at the scatter plot gives a better idea of the distribution of the data for each region, as simply looking at the means gives a false sense of similarity between the four regions. + +