Data Journey

International Studies Grad. racing for AI Engineer

Download as .zip Download as .tar.gz View on GitHub
10 February 2021

Data Visualization

by mervyn

source “K-MOOC 오세종 교수님의 강좌의 7. 데이터 시각화 중 (http://www.kmooc.kr/courses/course-v1:DKUK+DKUK0003+2020_T3)"

Tree Map

area size, color to indicate value

# install treemap
library(treemap)
data(GNI2014) # Call data 
str(GNI2014)  # Data content
treemap(GNI2014,
	index=c("continent", "iso3") # parameter hierarchy
	vSize="population", # relative area size
	vColor="GNI", # color
	type="value", # color from "value"
	bg.labels="yellow") # background color

We have countries, population, per capita income.

average income of population by continent??

GNI2014$GNI.total<- GNI2014$population*GNI2014$GNI
# create GNI.total and store continent income
head(GNI2014)
GNI2014.a<- aggregate(GNI2014[,4:6],
	    by=list(GNI2014$continent), sum)
# aggregate(): aggregate values of the fourth to sixth columns in GNI2014
# by=list: criteria of grouping
GNI2014.a$GNI.percap<- GNI2014.a$GNI.total/GNI2014.a$population
# continent sum/continent population

treemap(GNI2014.a,
	index=c("Group.1"),
	vSize="population",
	vColor="GNI.percap",
	type="value",
	bg.labels="yellow")

bubble chart

bigger circle » larger numbers correlation between two variables

# install MASS
library(MASS)
head(UScrime)
radius<- sqrt(UScrime$Pop) # root 
symbols(UScrime$U2, UScrime$y, # function to draw bubble graph
	circles=radius,
	inches=0.4,
	fg="white",
	bg="lightgray",
	lwd=1.5, # boldness of 
	xlab="unemployment 35-39 males".
	ylab="crime rate",
	main="UScrime Data")
text(UScrime$U2, UScrime$y, # first two parameters same to symbols()
     1:nrow(UScrime), # replace state name with number
     cex=0.1 , # font size
     col="brown") # font color

Boxplot

*7-3_Practice.csv

# 1 year temperature
setwd("c:/Rworks")
ds<- read.csv("7-3_Practice.csv")
head(ds)
summary(ds$avg_temp)
boxplot(ds$avg_temp,
	col="yellow",
	ylim=c(-20,40),
	xlab="Seoul Yearly Temp",
	ylab="Temp")

# month average temperature
month.avg<- aggregate(ds$avg_temp,
by=list(ds$month), median)[2]
odr<- rank(-month.avg) # order by descending
boxplot(avg_temp~month, data=ds,
	col=heat.colors(12)[odr],
	ylim=c(-20,40),
	ylab="Temp",
	xlab="Month",
	main="Seoul Monthly Average Temp")

mosaic plot

area size frequency in cross-tablet

hospital<- read.csv("c:/Rworks/hospital.csv")
head(hospital)
table(hospital)
mosaicplot(~freq+stay, data=hospital, color=TRUE,
main="Hospital Frequency vs Stay") # x axis, y axis
# color=c("green", "blue", "red")
tbl<-table(hospital)
mosaicplot(tbl, color=TRUE, 
main= "Hospital Frequency vs Stay")

ggplot

ggplot(data=xx, aes(x=x1, y=x2))+geom_xx()+ .. # Style 1
ggplot()+geom_xx(data=xx, aes(x=x1, y=x2))+..

Iris scatter plot: geom_point()

library(ggplot2)

# Style 1
ggplot(data=iris, aes(x=Petal.Length, # aes(): parameters 
	y=Petal.Width, color="red", size=4))+geom_point() # geom_graph shape

gp<- ggplot(data=iris, aes(x=Petal.Length, y=Petal.Width))+
     geom_point(
       aes(color=Species, shape=Species, # different color, shape by Species 
       alpha=0.5,
       size=2)

gp+ggtitle("IRIS data")+
 theme(plot.title=element_text(size=14, face="bold", color="blue", hjust=0.5))

# Style 2
ggplot()+geom_point(data=iris,
		    aes(x=Petal.Length,
		    y=Petal.Width))

line graph: geom_line()

bar graph: geom_bar()

boxplot: geom_boxplot()

back next

Index

tags: R

Comments

Post comment
Loading...

Share this: