Data Journey

International Studies Grad. racing for AI Engineer

Download as .zip Download as .tar.gz View on GitHub
30 January 2021

Handling Matrix, Data Frame

by mervyn

source “K-MOOC 오세종 교수님의 [R 데이터 분석 입문] 강좌의 3-3. matrix, data frame 다루기 중 (http://www.kmooc.kr/courses/course-v1:DKUK+DKUK0003+2020_T3)”

Extract data from data frame

dim(iris) # number of row and col
nrow(iris) # number of row
ncol(iris) # number of column
names(iris) # name of column
head(iris) # show part of the front of Dataset
tail(iris) # show part of the back of Dataset

str

str(iris) # Summary of Dataset

'data.frame': 150 obs of 5 variables #  Data structure of 'iris' dataset 
# obs: observations. number of data 
# variables: number of variables
# 150 row 5 col data frame
$ Sepal.Length: num # numeric data type column
$ Species     : Factor w/ 3 levels # factor column
# 3 species

unique, table

unique(iris[,5]) # number of Species(column 5) without overlaps
table(iris[,"Species"]) # Frequency distribution table. Count numbers by species

Sum and Average

colSums(iris[,-5]) # sum by column. exclude 5th column, which have character data
colMeans(iris[,-5]) # mean by column. 
rowSums(iris[,-5]) # sum by row. return 150 (number of row) data. 
rowMeans(iris[,-5]) # mean by row

transpose

z<- matrix(1:20, nrow=4, ncol=5)
z
t(z) # transpose

subset

Not for matrix. Convert to data frame Extract row satisfying condition

IR.1<- subset(iris, Species=="setosa") # (data frame, condition)
IR.1
IR.2<- subset(Sepal.Length>5.0 & Sepal.Width>4.0)
IR.2

Arithmatic Operation of Matrix (same number of row, col)

a<- matrix(1:20, 4, 5)
b<- matrix(21:40, 4, 5)
a
b
a+b
b-a
b/a
a*b # Simply multiplied values in according location. Multiplication of matrix use %*%   
3*a
b-5
2*a+3*b
a<- a*3 # store the result of operation again to affect matrix
b<- b-5

matrix vs data frame

Check data structure

is,matrix(iris) # FALSE
is.data.frame(iris) # TRUE
class(iris) # data frame
class(state.x77) # matrix

data frame to matrix

iris.m<- as.matrix(iris[,1:4]) # data frame to matrix
head(iris.m)
class(iris.m)

tmp<- iris[,-5]
class(tmp)
tmp2<- as.matrix(tmp)
class(tmp2)

matrix to data frame

st<- data.frame(state.x77) # matrix to data frame
head(st)
class(st)

Assignment

  1. Store state.x77 to st data frame

  2. Print st

  3. Print names of st colums

  4. Print names of st rows

  5. Print number rows and cols

  6. Print summary of information

  7. Print sum and mean of row

  8. Print sum and mean of col

  9. Print all information of Florida state

  10. Print income information of 50 states

  11. Print Area of Texas state

  12. Print Population and Income of Ohio state

  13. Print state data of population over 5000

  14. Print Population, Income, Area data of state income over 4500

  15. Count the number of states with Income over 4500

  16. Print the state information of area over 100000 and frost over 120

  17. Mean income of the state with illiteracy over 2.0

  18. Gap between state with illiteracy equal or more than 2.0 and less than 2.0

  19. The state with highest life.exp

  20. Show states with higher Income of Pennsylvania


st<- data.frame(state.x77) # 1
st # 2
names(st) # 3
rownames(st) # 4
dim(st) # 5
str(st) # 6
rowSums(st) 
rowMeans(st) # 7
colSums(st)
colMeans(st) # 8
str(Florida) # 9
# Florida<- c(st["Florida",])
# Florida
str(Income) # 10
# st[,"Income"]
st["Texas", "Area"] # 11
st["Ohio", "Population"]
st["Ohio", "Income"] # 12
subset(st, Population>=5000) # 13
states1<-subset(st, Income>=4500) 
states1[,"Area"]
states1[,"Population"]
states1[,"Income"] # 14
nrow(states1) # 15
states2<-subset(st, Area>=100000 & Frost >=120 )
str(states2) # 16
a<- subset(st, Illiteracy>=2.0)
a<- a[,"Income"]
is.vector(a)
mean(a) # 17
b<- subset(st, Illiteracy<2.0)
c<- subset(st, Illiteracy>=2.0)
b<- b[,"Income"]
c<- c[,"Income"]
b<- mean(b)
c<- mean(c)
b-c # 18
d<- st[, "Life.Exp"]
d<- max(d)
subset(st, Life.Exp==d) # 19
a<- st["Pennsylvania", "Income"]
subset(st, Income>a) # 20

back next

tags:

Comments

Post comment
Loading...

Share this: