4063 Final复习资料
4063 Final复习资料
4063 Final复习资料
median_some_numbers<-median(some_numbers)
median_some_numbers
range_some_numbers<-range(some_numbers)
range_some_numbers
quantile_some_numbers<-quantile(some_numbers)
quantile_some_numbers
#standard deviation
standard_deviation_some_numbers<-sd(some_numbers)
standard_deviation_some_numbers
different_numbers<- c(1,3,4,5,6,7,7,7,3,8,10)
summary(some_numbers)
summary(different_numbers)
View(some_numbers)
#visualizing
plot(some_numbers)
some_numbers
plot(some_numbers, type = "h", col="pink", main="statistics", xlab="Value",ylab="number")
hist(some_numbers)
barplot(some_numbers)
boxplot(some_numbers)
some_dataframe<-data.frame(some_numbers,different_numbers)
some_dataframe
plot(some_dataframe, type = "h", col="pink", main="statistics", xlab="Value",ylab="number")
some_dataframe[10,2]<-50
some_dataframe
5.
mydata<-Stocks
str(mydata)
head(mydata)
View(mydata)
mydata$Stock1<-as.numeric(as.character(mydata$Stock1))
mydata$Stock2<-as.numeric(as.character(mydata$Stock2))
mydata$Stock3<-as.numeric(as.character(mydata$Stock3))
mydata$Stock4<-as.numeric(as.character(mydata$Stock4))
mydata$Stock5<-as.numeric(as.character(mydata$Stock5))
mydata$Stock6<-as.numeric(as.character(mydata$Stock6))
mydata$Stock7<-as.numeric(as.character(mydata$Stock7))
mydata$Stock8<-as.numeric(as.character(mydata$Stock8))
mydata$Stock9<-as.numeric(as.character(mydata$Stock9))
mydata$Stock10<-as.numeric(as.character(mydata$Stock10))
mydata$Rating<-as.factor(as.character(mydata$Rating))
str(mydata)
#check for null"NA"values
table(is.na(mydata))
complete.cases(mydata)
#remove NA's by overwriting with the mean of that column
mydata$Stock1[is.na(mydata$Stock1)]=mean(mydata$Stock1,na.rm = TRUE)
6.
#Use three columns of data
weather<-c("rainy","snow","sunny")
time<-c("urgent","adequate","adequate")
health<-c("bad","good","good")
lawn<-c("no","yes","yes")
dataset<-data.frame(weather,time,health,lawn)
View(dataset)
str(dataset)
dataset$weather<-as.factor(as.character(dataset$weather))
dataset$time<-as.factor(as.character(dataset$time))
dataset$health<-as.factor(as.character(dataset$health))
dataset$lawn<-as.factor(as.character(dataset$lawn))
str(dataset)
#Predicted
set.seed(999)
train=dataset[trainIndex, ]
test=dataset[-trainIndex, ]
print(table(dataset$lawn))
print(train(dataset$lawn))
NBclassfier=naiveBayes(lawn~weather+time+health, dataset=train)
print(NBclassfier)
7.Naive bayer
str(CreditRating)
CreditRating$PurchaseFrequency<-as.factor(as.character(CreditRating$PurchaseFrequency))
CreditRating$CreditRating<-as.factor(as.character(CreditRating$CreditRating))
CreditRating$Age<-as.factor(as.character(CreditRating$Age))
CreditRating$Approval<-as.factor(as.character(CreditRating$Approval))
str(CreditRating)
print(table(train$Approval))
NBclassfier=naiveBayes(Approval~CreditRating+Age+Approval, data=train)
print(NBclassfier)
8.
#load dataset: iris
data("iris")
#view structure of dataset
str(iris)
#view summary of dataset
summary(iris)
#view top row
head(iris)
#creat new dataset
iris.new<- iris[,c(1,2,3,4)]
iris.class<- iris[,"Species"]
#view top row
head(iris.new)
head(iris.class)
normalize <- function(x){return ((x-min(x))/(max(x)-min(x)))}
iris.new$Sepal.Length<- normalize(iris.new$Sepal.Length)
iris.new$Sepal.Width<- normalize(iris.new$Sepal.Width)
iris.new$Petal.Length<- normalize(iris.new$Petal.Length)
iris.new$Petal.Width<- normalize(iris.new$Petal.Width)
#view top row
head(iris.new)
#apply k-means clustering algorithm
result<- kmeans(iris.new,3)
#give number of each cluster
result$size
# gives value of cluster center datapoint value
result$centers
#gives cluster vector
result$cluster
#Verify results of clustering
par(mfrow=c(2,2), mar=c(5,4,2,2))
# Plot to see sepal length and sepal width been distributed in clusters
plot(iris.new[c(1,2)], col=result$cluster)
# Plot to see Sepal. length and Sepal. width been distributed originally as per "class"
attribute in dataset
plot(iris.new[c(1,2)], col=iris.class)
# Plot to see how Petal.Length and Petal.Width data points have been distributed in clusters
plot(iris.new[c(3,4)], col=result$cluster)
plot(iris.new[c(3,4)], col=iris.class)
#Result of table
table(result$cluster,iris.class)
9.
#explore data
View(grades)
str(grades)
table(grades$Level)
#clean data
grades$Level<-as.factor(as.character(grades$Level))
str(grades)
set.seed(2134)
ind<-sample(2,nrow(grades),replace = TRUE, prob = c(0.7,0.3))
train_set<-grades[ind==1,]
test_set<-grades[ind==2,]
nrow(train_set)
nrow(test_set)
#building desision tree
mytree<-tree(Level~Quiz1+Quiz2+Quiz3+Quiz4+Quiz5, data = train_set)
#summarize the model
summary(mytree)
#plot the tree
plot(mytree)
text(mytree,pretty=0, cex=0.6)
10.
#Import the data1.csv file into Rstudio.
Summarize the data.
Remove the last column and create a boxplot from the remaining columns.
Create a scatterplot of column 1 and column 3.
Calculate the correlation between column 1 and 3.
data1<-read.csv("data1.csv")
summary(data1)
data2<-data1[,-6]
boxplot(data2)
pairs(data2[, 1:3])
cor(data2[, 1:3])