-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtask (2)
More file actions
36 lines (35 loc) · 1.47 KB
/
task (2)
File metadata and controls
36 lines (35 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
install.packages(c("dplyr", "ggplot2", "tidyr", "corrplot"))
library(dplyr)
library(ggplot2)
library(tidyr)
library(corrplot)
t1=file.choose()
t1
titanic=read.csv( "C:\\Users\\Divyanshi Singh\\Downloads\\titanic_data (1).csv")
sapply(titanic, function(x) sum(is.na(x)))
titanic$Age[is.na(titanic$Age)] <- median(titanic$Age, na.rm = TRUE)
titanic$Embarked[is.na(titanic$Embarked)] <- "S" # Assuming "S" is the mode for Embarked
titanic$Survived <- factor(titanic$Survived, levels = c(0, 1), labels = c("No", "Yes"))
titanic$Pclass <- factor(titanic$Pclass)
titanic$Sex <- factor(titanic$Sex)
titanic <- titanic %>% select(-PassengerId, -Name, -Ticket, -Cabin)
summary(titanic)
ggplot(titanic, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black") +
theme_minimal() +
labs(title = "Age Distribution of Titanic Passengers")
ggplot(titanic, aes(x = Fare)) +
geom_histogram(binwidth = 10, fill = "skyblue", color = "black") +
theme_minimal() +
labs(title = "Fare Distribution of Titanic Passengers")
ggplot(titanic, aes(x = Sex, fill = Survived)) +
geom_bar(position = "fill") +
labs(title = "Survival Rate by Gender") +
theme_minimal()
ggplot(titanic, aes(x = Pclass, fill = Survived)) +
geom_bar(position = "fill") +
labs(title = "Survival Rate by Passenger Class") +
theme_minimal()
numeric_titanic <- titanic %>% select_if(is.numeric)
correlation_matrix <- cor(numeric_titanic, use = "complete.obs")
corrplot(correlation_matrix, method = "circle")