Handling Matrix, Data Frame
by mervyn
source “K-MOOC 오세종 교수님의 [R 데이터 분석 입문] 강좌의 3-3. matrix, data frame 다루기 중 (http://www.kmooc.kr/courses/course-v1:DKUK+DKUK0003+2020_T3)”
Extract data from data frame
dim(iris) # number of row and col
nrow(iris) # number of row
ncol(iris) # number of column
names(iris) # name of column
head(iris) # show part of the front of Dataset
tail(iris) # show part of the back of Dataset
str
str(iris) # Summary of Dataset
'data.frame': 150 obs of 5 variables # Data structure of 'iris' dataset
# obs: observations. number of data
# variables: number of variables
# 150 row 5 col data frame
$ Sepal.Length: num # numeric data type column
$ Species : Factor w/ 3 levels # factor column
# 3 species
unique, table
unique(iris[,5]) # number of Species(column 5) without overlaps
table(iris[,"Species"]) # Frequency distribution table. Count numbers by species
Sum and Average
colSums(iris[,-5]) # sum by column. exclude 5th column, which have character data
colMeans(iris[,-5]) # mean by column.
rowSums(iris[,-5]) # sum by row. return 150 (number of row) data.
rowMeans(iris[,-5]) # mean by row
transpose
z<- matrix(1:20, nrow=4, ncol=5)
z
t(z) # transpose
subset
Not for matrix. Convert to data frame Extract row satisfying condition
IR.1<- subset(iris, Species=="setosa") # (data frame, condition)
IR.1
IR.2<- subset(Sepal.Length>5.0 & Sepal.Width>4.0)
IR.2
Arithmatic Operation of Matrix (same number of row, col)
a<- matrix(1:20, 4, 5)
b<- matrix(21:40, 4, 5)
a
b
a+b
b-a
b/a
a*b # Simply multiplied values in according location. Multiplication of matrix use %*%
3*a
b-5
2*a+3*b
a<- a*3 # store the result of operation again to affect matrix
b<- b-5
matrix vs data frame
Check data structure
is,matrix(iris) # FALSE
is.data.frame(iris) # TRUE
class(iris) # data frame
class(state.x77) # matrix
data frame to matrix
iris.m<- as.matrix(iris[,1:4]) # data frame to matrix
head(iris.m)
class(iris.m)
tmp<- iris[,-5]
class(tmp)
tmp2<- as.matrix(tmp)
class(tmp2)
matrix to data frame
st<- data.frame(state.x77) # matrix to data frame
head(st)
class(st)
Assignment
-
Store state.x77 to st data frame
-
Print st
-
Print names of st colums
-
Print names of st rows
-
Print number rows and cols
-
Print summary of information
-
Print sum and mean of row
-
Print sum and mean of col
-
Print all information of Florida state
-
Print income information of 50 states
-
Print Area of Texas state
-
Print Population and Income of Ohio state
-
Print state data of population over 5000
-
Print Population, Income, Area data of state income over 4500
-
Count the number of states with Income over 4500
-
Print the state information of area over 100000 and frost over 120
-
Mean income of the state with illiteracy over 2.0
-
Gap between state with illiteracy equal or more than 2.0 and less than 2.0
-
The state with highest life.exp
-
Show states with higher Income of Pennsylvania
st<- data.frame(state.x77) # 1
st # 2
names(st) # 3
rownames(st) # 4
dim(st) # 5
str(st) # 6
rowSums(st)
rowMeans(st) # 7
colSums(st)
colMeans(st) # 8
str(Florida) # 9
# Florida<- c(st["Florida",])
# Florida
str(Income) # 10
# st[,"Income"]
st["Texas", "Area"] # 11
st["Ohio", "Population"]
st["Ohio", "Income"] # 12
subset(st, Population>=5000) # 13
states1<-subset(st, Income>=4500)
states1[,"Area"]
states1[,"Population"]
states1[,"Income"] # 14
nrow(states1) # 15
states2<-subset(st, Area>=100000 & Frost >=120 )
str(states2) # 16
a<- subset(st, Illiteracy>=2.0)
a<- a[,"Income"]
is.vector(a)
mean(a) # 17
b<- subset(st, Illiteracy<2.0)
c<- subset(st, Illiteracy>=2.0)
b<- b[,"Income"]
c<- c[,"Income"]
b<- mean(b)
c<- mean(c)
b-c # 18
d<- st[, "Life.Exp"]
d<- max(d)
subset(st, Life.Exp==d) # 19
a<- st["Pennsylvania", "Income"]
subset(st, Income>a) # 20
Comments
Post comment