-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHousePrices.R
More file actions
84 lines (65 loc) · 2.69 KB
/
HousePrices.R
File metadata and controls
84 lines (65 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Importing the data
houseSale1 <- read.csv(file = "~/Downloads/kaggleHouseSaleTrain.csv")
houseSale2 <- read.csv(file = "~/Downloads/kaggleHouseSaleTest.csv")
# Combining The Data
houseSale2$SalePrice <- NA
houseSale <- rbind(houseSale1, houseSale2)
# Analyzing Variables
str(houseSale)
#Effect of the MS Subclass on the SalePrice
houseSale$MSSubClass <- as.factor(houseSale$MSSubClass)
library(ggplot2)
# Outlier in the sale price above 300000
ggplot(aes(x = 0, y = SalePrice), data = subset(houseSale, !is.na(houseSale$SalePrice)))+
geom_boxplot()+
ylim(c(0,300000))
ggtitle("Sale Price Of The Houses")
houseSale1$MSSubClass <- as.factor(houseSale1$MSSubClass)
# MSSubClass VS Sale Price For Sales Price Less Than 300000
ggplot(aes(x = MSSubClass, y = SalePrice),
data = subset(houseSale1, houseSale1$SalePrice < 300000))+
geom_bar(stat = "summary", fun.y = mean)+
ggtitle("MSSubclass Vs Sale Price")
# Comibining The Levels Based On Influence On Sale Price
#Adding MSSubClass Star Variable in the data
houseSale$MSSubClassStar <- NA
for(i in 1:nrow(houseSale)){
if(houseSale[i,"MSSubClass"] == "30" |
houseSale[i,"MSSubClass"] == "45" |
houseSale[i,"MSSubClass"] == "180") {
houseSale[i,"MSSubClassStar"] = 1
}
else if (houseSale[i,"MSSubClass"] == "50" |
houseSale[i,"MSSubClass"] == "85" |
houseSale[i,"MSSubClass"] == "90" |
houseSale[i,"MSSubClass"] == "160" |
houseSale[i,"MSSubClass"] == "190"){
houseSale[i,"MSSubClassStar"] = 2
}
else if(houseSale[i,"MSSubClass"] == "20" |
houseSale[i,"MSSubClass"] == "40" |
houseSale[i,"MSSubClass"] == "70" |
houseSale[i,"MSSubClass"] == "75" |
houseSale[i,"MSSubClass"] == "80"){
houseSale[i,"MSSubClassStar"] = 3
}
else {
houseSale[i,"MSSubClassStar"] = 4
}
}
houseSale$MSSubClassStar <- as.factor(houseSale$MSSubClassStar)
# Plotting MSSubclass Star Vs The Sale Price on Sale Price less Than 300000
ggplot(aes(x = MSSubClassStar, y = SalePrice),
data = subset(houseSale, houseSale$SalePrice < 300000))+
geom_bar(stat = "summary", fun.y = mean, width = 0.5)+
ggtitle("MSSubclass Vs Sale Price")
# Plotting MSSubclass Star Vs The Sale Price on Sale Price Greater Than 30000
ggplot(aes(x = MSSubClassStar, y = SalePrice),
data = subset(houseSale, houseSale$SalePrice > 300000))+
geom_bar(stat = "summary", fun.y = mean, width = 0.5, fill = "lightblue")+
ggtitle("MSSubclass Vs Sale Price")
# MSZoning VS Sale price Less than 30000
ggplot(aes(x = MSZoning, y = SalePrice),
data = houseSale1)+
geom_bar(stat = "summary", fun.y = mean, width = 0.5)+
ggtitle("MSZoning Vs Sale Price Less Than 300000")