This SOCR Webapp utilizes the SOCR 2011 US Jobs Dataset to demonstrate the dynamic data dashboarding using Rmarkdown/Knitr, Plotly, and FlexDashboard. Another SOCR Dahsboard app provides a purely HTML/JavaScript mechanism for graphical database query, hypothesis generation, and exploratory data analytics. Another SOCR Webapp illustrates dimensionality reduction using t-SNE for the same Jobs dataset.
Scroll through the four main tabs on the top: Data Import, Data Summary, Graphical Dashboard, and Source Code
. Additional details on RMarkdown, HTML widgets, R dashboarding, and interactive docs are available here.
library(rvest)
wiki_url <- read_html("http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking")
html_nodes(wiki_url, "#content")
{xml_nodeset (1)}
[1] <div id="content" class="mw-body-primary" role="main">\n\t<a id="top ...
jobs <- html_table(html_nodes(wiki_url, "table")[[1]])
[1] "Overall_Score" "Average_Income(USD)" "Work_Environment"
[4] "Stress_Level" "Physical_Demand" "Hiring_Potential"
Overall_Score Average_Income(USD) Work_Environment Stress_Level
Min. : 60.0 Min. : 18053 Min. : 89.52 Min. : 5.94
1st Qu.:358.8 1st Qu.: 33910 1st Qu.: 404.10 1st Qu.:15.07
Median :528.0 Median : 47138 Median : 667.55 Median :20.34
Mean :502.1 Mean : 54429 Mean : 758.28 Mean :22.44
3rd Qu.:645.0 3rd Qu.: 63180 3rd Qu.:1016.24 3rd Qu.:27.34
Max. :892.0 Max. :365258 Max. :3314.03 Max. :60.22
Physical_Demand Hiring_Potential
Min. : 3.950 Min. :-40.760
1st Qu.: 7.213 1st Qu.: -3.533
Median : 9.990 Median : 4.590
Mean :13.003 Mean : 3.837
3rd Qu.:16.025 3rd Qu.: 11.080
Max. :43.230 Max. : 37.050
Overall_Score Average_Income(USD) Work_Environment Stress_Level
Min. :-2.3709 Min. :-1.0221 Min. :-1.4402 Min. :-1.6145
1st Qu.:-0.7686 1st Qu.:-0.5765 1st Qu.:-0.7627 1st Qu.:-0.7207
Median : 0.1392 Median :-0.2049 Median :-0.1954 Median :-0.2056
Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
3rd Qu.: 0.7667 3rd Qu.: 0.2459 3rd Qu.: 0.5555 3rd Qu.: 0.4791
Max. : 2.0914 Max. : 8.7334 Max. : 5.5038 Max. : 3.6965
Physical_Demand Hiring_Potential
Min. :-1.1178 Min. :-3.67145
1st Qu.:-0.7150 1st Qu.:-0.60672
Median :-0.3720 Median : 0.06196
Mean : 0.0000 Mean : 0.00000
3rd Qu.: 0.3732 3rd Qu.: 0.59625
Max. : 3.7325 Max. : 2.73421
[1] "Overall_Score" "Average_Income(USD)" "Work_Environment"
[4] "Stress_Level" "Physical_Demand" "Hiring_Potential"
[7] "Stress_category"
---
title: "SOCR Jobs Dashboard"
author: "SOCR Team"
date: "`r format(Sys.time(), '%B %Y')`"
output:
flexdashboard::flex_dashboard:
orientation: rows
social: menu
source_code: embed
---
```{r setup, include=FALSE}
# install.packages("flexdashboard")
library(flexdashboard)
library(knitr)
library(ggplot2)
library(plotly)
library(plyr)
```
Load the Data
=======================================================================
This [SOCR Webapp](http://socr.umich.edu/HTML5/) utilizes the [SOCR 2011 US Jobs Dataset](http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking) to demonstrate the dynamic data dashboarding using Rmarkdown/Knitr, Plotly, and FlexDashboard. Another [SOCR Dahsboard app](http://socr.umich.edu/HTML5/Dashboard/) provides a purely HTML/JavaScript mechanism for graphical database query, hypothesis generation, and exploratory data analytics. Another [SOCR Webapp](http://socr.umich.edu/HTML5/) illustrates [dimensionality reduction using t-SNE for the same Jobs dataset](http://socr.umich.edu/HTML5/SOCR_TensorBoard_UKBB/UsersData.html).
Scroll through the *four main tabs* on the top: [Data Import](./SOCR_Dashboard_Example_Jobs.html#load-the-data), [Data Summary](./SOCR_Dashboard_Example_Jobs.html#data-summary), [Graphical Dashboard](./SOCR_Dashboard_Example_Jobs.html#layout-the-dashboard), and `Source Code`. Additional details on [RMarkdown, HTML widgets, R dashboarding, and interactive docs are available here](https://rmarkdown.rstudio.com/gallery.html).
```{r echo=T}
library(rvest)
wiki_url <- read_html("http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking")
html_nodes(wiki_url, "#content")
jobs <- html_table(html_nodes(wiki_url, "table")[[1]])
```
Data Summary
=======================================================================
```{r}
jobs1 <- jobs[ , -c(1,7,10)]
job.names <- jobs1[ , 1]
jobs2 <- jobs1[ , -1]
colnames(jobs2); summary(jobs2)
jobs3 <- apply(jobs2, 2, scale); summary(jobs3)
jobs3_color <- paste0("#", jobs[ , 7])
jobs3_color1 <- expand.grid(color_1 = min(jobs3_color),
color_2 = max(jobs3_color), stringsAsFactors = FALSE)
jobs2$Stress_category <- jobs[ , 7]
colnames(jobs2)
```
Layout the Dashboard
=======================================================================
Row {data-width=300}
-----------------------------------------------------------------------
### Histogram Chart (Stress-Levels)
```{r}
# install.packages("highcharter")
library(highcharter)
library(ggplot2)
library(plotly)
# p1 <- ggplot(jobs2, aes(x = Stress_category)) +
# geom_histogram(binwidth = 0.5,
# col = rainbow(11), fill = rainbow(11)) +
# ggtitle("Frequency histogram of Jobs by Stress-level Category")
# p1
df <- data.frame(Jobs_Physical_Demand <- jobs2$Physical_Demand,
Jobs_Overall_Rank <- jobs2$Overall_Score)
highchart() %>% hc_title(text="Job Stress-Levels (X) against Job Ranking (Y)") %>%
hc_chart(type = "bar", showInLegend = FALSE) %>%
hc_add_series(data = jobs2$Stress_category, color = "blue")
```
### Scatterplot-2D Density (Jobsa Physical Demand, Overall Rank)
```{r}
p <- ggplot(df, aes(Jobs_Physical_Demand, Jobs_Overall_Rank)) +
geom_point(alpha = 0.5) +
geom_density_2d() +
theme(panel.background = element_rect(fill = '#ffffff'))
ggplotly(p)
```
### Tree-Map Chart
```{r}
# install.packages("treemap")
library(treemap)
library(flexdashboard)
# Define income categories by Q1, Q2=median, Q3
jobs2$IncomeCat <- cut(jobs2$`Average_Income(USD)`, c(0, 33910, 47138, 63180, Inf), labels=c(1:4))
jobs2$HP <- jobs2$Hiring_Potential - min(jobs2$Hiring_Potential)
tm2 <- treemap(jobs, index = c("Job_Title", "Stress_Category"),
vSize = "Average_Income(USD)", vColor = "Overall_Score",
type = "value", palette = rev(rainbow(6)),
draw = F)
hctreemap(tm2, allowDrillToNode = TRUE, layoutAlgorithm = "squarified", joinBy = "Stress_Category") %>%
hc_title(text = "US 2011 Jobs Data\n (Size:Avg.Income, Color:Stress-Level Category)") %>%
hc_tooltip(pointFormat = "{point.name}:
Average_Income(USD): {point.value:,.0f}
Overall_Score: {point.valuecolor:,.0f}")
```
### t-SNE Manifold Chart
```{r}
# ggplot jobs into the 2D t-SNE manifold
library("tsne")
set.seed(13242)
tsne_jobs <- jobs2 %>%
as.data.frame() %>%
dplyr::tbl_df() %>%
model.matrix(~., data = .) %>%
as.data.frame() %>%
dplyr::tbl_df() %>%
.[-c(1,8)] %>%
tsne(perplexity = 60)
df2 <- jobs2 %>%
dplyr::mutate(x = tsne_jobs[, 1],
y = tsne_jobs[, 2])
df2_centers <- df2 %>%
dplyr::group_by(Stress_category) %>%
dplyr::summarise(cx = mean(x),
cy = mean(y),
sdcx = sd(x),
sdcy = sd(y))
df2$Stress_category <- as.factor(df2$Stress_category)
cols <- df2 %>%
dplyr::select(Stress_category) %>%
dplyr::distinct() %>% { setNames(.$Stress_category, .$Stress_category) }
ggPlotMe <- ggplot(df2) +
geom_point(aes(x, y, color = Stress_category), size = 4, alpha = 0.5) +
scale_color_manual("Type", values = cols) +
geom_text(data = df2_centers, aes(cx, cy, label = Stress_category)) +
theme_minimal() +
theme(legend.position = "right") # +
# facet_wrap(~Stress_category) # if you want to separate the plots for each Stress Level
ggPlotMe
```
Row {data-width=900}
-----------------------------------------------------------------------
### 2D Scatterplot
```{r}
df2$Job_Title <- jobs$Job_Title
ds <- df2 %>% # list output
dplyr::select(Overall_Score, "Average_Income(USD)", Work_Environment, Stress_Level,
Physical_Demand, Hiring_Potential, Stress_category,
color = Stress_category, x, y) %>%
list_parse() # Convert the DF object to list preserving the object structure
ds2 <- df2 %>% # list output
dplyr::select(color = Stress_Level, x, y) %>%
mutate(color = hex_to_rgba(color, 0.05)) %>%
list_parse()
tooltip <- c("Job_Title", "Stress_category", "Physical_Demand")
# jobs$"Job_Title", "Stress_Category", Average_Income(USD): Overall_Score:"
scatter_HChart <- highchart() %>%
hc_chart(zoomType = "xy") %>%
hc_xAxis(minRange = diff(range(df2$Stress_Level))/5) %>%
hc_yAxis(minRange = diff(range(df2$Hiring_Potential))/5) %>%
hc_add_series(data = ds,
type = "scatter",
name = "Job_Title",
states = list(hover = list(halo = list(
size = 50,
attributes = list(
opacity = 0.8)
)))) %>%
hc_add_series(data = ds2, type = "scatter",
marker = list(radius = 50, symbol = "circle"),
zIndex = -3, enableMouseTracking = F,
linkedTo = ":previous"
) %>%
hc_plotOptions(series = list()) %>%
hc_tooltip(
useHTML = TRUE,
borderRadius = 1,
borderWidth = 5,
headerFormat = "",
pointFormat = tooltip,
footerFormat = "
"
) %>%
hc_add_theme(
hc_theme_null(
chart = list(
backgroundColor = "transparent",
style = list(
fontFamily = "Roboto"
)
)
)
)
scatter_HChart
```