Data Visualization
by mervyn
source
“K-MOOC 오세종 교수님의
Tree Map
area size, color to indicate value
# install treemap
library(treemap)
data(GNI2014) # Call data
str(GNI2014) # Data content
treemap(GNI2014,
index=c("continent", "iso3") # parameter hierarchy
vSize="population", # relative area size
vColor="GNI", # color
type="value", # color from "value"
bg.labels="yellow") # background color
We have countries, population, per capita income.
average income of population by continent??
- Continent income = sum(population*per capita income)
- Continent per capita income = sum(population*per capita income)/continent population
GNI2014$GNI.total<- GNI2014$population*GNI2014$GNI
# create GNI.total and store continent income
head(GNI2014)
GNI2014.a<- aggregate(GNI2014[,4:6],
by=list(GNI2014$continent), sum)
# aggregate(): aggregate values of the fourth to sixth columns in GNI2014
# by=list: criteria of grouping
GNI2014.a$GNI.percap<- GNI2014.a$GNI.total/GNI2014.a$population
# continent sum/continent population
treemap(GNI2014.a,
index=c("Group.1"),
vSize="population",
vColor="GNI.percap",
type="value",
bg.labels="yellow")
bubble chart
bigger circle » larger numbers correlation between two variables
# install MASS
library(MASS)
head(UScrime)
radius<- sqrt(UScrime$Pop) # root
symbols(UScrime$U2, UScrime$y, # function to draw bubble graph
circles=radius,
inches=0.4,
fg="white",
bg="lightgray",
lwd=1.5, # boldness of
xlab="unemployment 35-39 males".
ylab="crime rate",
main="UScrime Data")
text(UScrime$U2, UScrime$y, # first two parameters same to symbols()
1:nrow(UScrime), # replace state name with number
cex=0.1 , # font size
col="brown") # font color
Boxplot
*7-3_Practice.csv
# 1 year temperature
setwd("c:/Rworks")
ds<- read.csv("7-3_Practice.csv")
head(ds)
summary(ds$avg_temp)
boxplot(ds$avg_temp,
col="yellow",
ylim=c(-20,40),
xlab="Seoul Yearly Temp",
ylab="Temp")
# month average temperature
month.avg<- aggregate(ds$avg_temp,
by=list(ds$month), median)[2]
odr<- rank(-month.avg) # order by descending
boxplot(avg_temp~month, data=ds,
col=heat.colors(12)[odr],
ylim=c(-20,40),
ylab="Temp",
xlab="Month",
main="Seoul Monthly Average Temp")
mosaic plot
area size frequency in cross-tablet
hospital<- read.csv("c:/Rworks/hospital.csv")
head(hospital)
table(hospital)
mosaicplot(~freq+stay, data=hospital, color=TRUE,
main="Hospital Frequency vs Stay") # x axis, y axis
# color=c("green", "blue", "red")
tbl<-table(hospital)
mosaicplot(tbl, color=TRUE,
main= "Hospital Frequency vs Stay")
ggplot
ggplot(data=xx, aes(x=x1, y=x2))+geom_xx()+ .. # Style 1
ggplot()+geom_xx(data=xx, aes(x=x1, y=x2))+..
Iris scatter plot: geom_point()
library(ggplot2)
# Style 1
ggplot(data=iris, aes(x=Petal.Length, # aes(): parameters
y=Petal.Width, color="red", size=4))+geom_point() # geom_graph shape
gp<- ggplot(data=iris, aes(x=Petal.Length, y=Petal.Width))+
geom_point(
aes(color=Species, shape=Species, # different color, shape by Species
alpha=0.5,
size=2)
gp+ggtitle("IRIS data")+
theme(plot.title=element_text(size=14, face="bold", color="blue", hjust=0.5))
# Style 2
ggplot()+geom_point(data=iris,
aes(x=Petal.Length,
y=Petal.Width))
line graph: geom_line()
bar graph: geom_bar()
boxplot: geom_boxplot()
tags: R
Comments
Post comment