30 January 2021

Handling Matrix, Data Frame

by mervyn

source “K-MOOC 오세종 교수님의 [R 데이터 분석 입문] 강좌의 3-3. matrix, data frame 다루기 중 (http://www.kmooc.kr/courses/course-v1:DKUK+DKUK0003+2020_T3)”

Extract data from data frame

dim(iris) # number of row and col
nrow(iris) # number of row
ncol(iris) # number of column
names(iris) # name of column
head(iris) # show part of the front of Dataset
tail(iris) # show part of the back of Dataset

str

str(iris) # Summary of Dataset

'data.frame': 150 obs of 5 variables #  Data structure of 'iris' dataset 
# obs: observations. number of data 
# variables: number of variables
# 150 row 5 col data frame
$ Sepal.Length: num # numeric data type column
$ Species     : Factor w/ 3 levels # factor column
# 3 species

unique, table

unique(iris[,5]) # number of Species(column 5) without overlaps
table(iris[,"Species"]) # Frequency distribution table. Count numbers by species

Sum and Average

colSums(iris[,-5]) # sum by column. exclude 5th column, which have character data
colMeans(iris[,-5]) # mean by column. 
rowSums(iris[,-5]) # sum by row. return 150 (number of row) data. 
rowMeans(iris[,-5]) # mean by row

transpose

z<- matrix(1:20, nrow=4, ncol=5)
z
t(z) # transpose

subset

Not for matrix. Convert to data frame Extract row satisfying condition

IR.1<- subset(iris, Species=="setosa") # (data frame, condition)
IR.1
IR.2<- subset(Sepal.Length>5.0 & Sepal.Width>4.0)
IR.2

Arithmatic Operation of Matrix (same number of row, col)

a<- matrix(1:20, 4, 5)
b<- matrix(21:40, 4, 5)
a
b
a+b
b-a
b/a
a*b # Simply multiplied values in according location. Multiplication of matrix use %*%   
3*a
b-5
2*a+3*b
a<- a*3 # store the result of operation again to affect matrix
b<- b-5

matrix vs data frame

Check data structure

is,matrix(iris) # FALSE
is.data.frame(iris) # TRUE
class(iris) # data frame
class(state.x77) # matrix

data frame to matrix

iris.m<- as.matrix(iris[,1:4]) # data frame to matrix
head(iris.m)
class(iris.m)

tmp<- iris[,-5]
class(tmp)
tmp2<- as.matrix(tmp)
class(tmp2)

matrix to data frame

st<- data.frame(state.x77) # matrix to data frame
head(st)
class(st)

Assignment

Store state.x77 to st data frame
Print st
Print names of st colums
Print names of st rows
Print number rows and cols
Print summary of information
Print sum and mean of row
Print sum and mean of col
Print all information of Florida state
Print income information of 50 states
Print Area of Texas state
Print Population and Income of Ohio state
Print state data of population over 5000
Print Population, Income, Area data of state income over 4500
Count the number of states with Income over 4500
Print the state information of area over 100000 and frost over 120
Mean income of the state with illiteracy over 2.0
Gap between state with illiteracy equal or more than 2.0 and less than 2.0
The state with highest life.exp
Show states with higher Income of Pennsylvania

st<- data.frame(state.x77) # 1
st # 2
names(st) # 3
rownames(st) # 4
dim(st) # 5
str(st) # 6
rowSums(st) 
rowMeans(st) # 7
colSums(st)
colMeans(st) # 8
str(Florida) # 9
# Florida<- c(st["Florida",])
# Florida
str(Income) # 10
# st[,"Income"]
st["Texas", "Area"] # 11
st["Ohio", "Population"]
st["Ohio", "Income"] # 12
subset(st, Population>=5000) # 13
states1<-subset(st, Income>=4500) 
states1[,"Area"]
states1[,"Population"]
states1[,"Income"] # 14
nrow(states1) # 15
states2<-subset(st, Area>=100000 & Frost >=120 )
str(states2) # 16
a<- subset(st, Illiteracy>=2.0)
a<- a[,"Income"]
is.vector(a)
mean(a) # 17
b<- subset(st, Illiteracy<2.0)
c<- subset(st, Illiteracy>=2.0)
b<- b[,"Income"]
c<- c[,"Income"]
b<- mean(b)
c<- mean(c)
b-c # 18
d<- st[, "Life.Exp"]
d<- max(d)
subset(st, Life.Exp==d) # 19
a<- st["Pennsylvania", "Income"]
subset(st, Income>a) # 20

back next

tags:

Comments

Post comment

Data Journey

International Studies Grad. racing for AI Engineer

Handling Matrix, Data Frame

Extract data from data frame

str

unique, table

Sum and Average

transpose

subset

Arithmatic Operation of Matrix (same number of row, col)

matrix vs data frame

Assignment

Comments

Handling Matrix, Data Frame

Extract data from data frame

str

unique, table

Sum and Average

transpose

subset

Arithmatic Operation of Matrix (same number of row, col)

matrix vs data frame

Assignment

Comments

Share this: