Load the Data

This SOCR Webapp utilizes the SOCR 2011 US Jobs Dataset to demonstrate the dynamic data dashboarding using Rmarkdown/Knitr, Plotly, and FlexDashboard. Another SOCR Dahsboard app provides a purely HTML/JavaScript mechanism for graphical database query, hypothesis generation, and exploratory data analytics. Another SOCR Webapp illustrates dimensionality reduction using t-SNE for the same Jobs dataset.

Scroll through the four main tabs on the top: Data Import, Data Summary, Graphical Dashboard, and Source Code. Additional details on RMarkdown, HTML widgets, R dashboarding, and interactive docs are available here.

library(rvest)
wiki_url <- read_html("http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking")
html_nodes(wiki_url, "#content")
{xml_nodeset (1)}
[1] <div id="content" class="mw-body-primary" role="main">\n\t<a id="top ...
jobs <- html_table(html_nodes(wiki_url, "table")[[1]])

Data Summary

[1] "Overall_Score"       "Average_Income(USD)" "Work_Environment"   
[4] "Stress_Level"        "Physical_Demand"     "Hiring_Potential"   
 Overall_Score   Average_Income(USD) Work_Environment   Stress_Level  
 Min.   : 60.0   Min.   : 18053      Min.   :  89.52   Min.   : 5.94  
 1st Qu.:358.8   1st Qu.: 33910      1st Qu.: 404.10   1st Qu.:15.07  
 Median :528.0   Median : 47138      Median : 667.55   Median :20.34  
 Mean   :502.1   Mean   : 54429      Mean   : 758.28   Mean   :22.44  
 3rd Qu.:645.0   3rd Qu.: 63180      3rd Qu.:1016.24   3rd Qu.:27.34  
 Max.   :892.0   Max.   :365258      Max.   :3314.03   Max.   :60.22  
 Physical_Demand  Hiring_Potential 
 Min.   : 3.950   Min.   :-40.760  
 1st Qu.: 7.213   1st Qu.: -3.533  
 Median : 9.990   Median :  4.590  
 Mean   :13.003   Mean   :  3.837  
 3rd Qu.:16.025   3rd Qu.: 11.080  
 Max.   :43.230   Max.   : 37.050  
 Overall_Score     Average_Income(USD) Work_Environment   Stress_Level    
 Min.   :-2.3709   Min.   :-1.0221     Min.   :-1.4402   Min.   :-1.6145  
 1st Qu.:-0.7686   1st Qu.:-0.5765     1st Qu.:-0.7627   1st Qu.:-0.7207  
 Median : 0.1392   Median :-0.2049     Median :-0.1954   Median :-0.2056  
 Mean   : 0.0000   Mean   : 0.0000     Mean   : 0.0000   Mean   : 0.0000  
 3rd Qu.: 0.7667   3rd Qu.: 0.2459     3rd Qu.: 0.5555   3rd Qu.: 0.4791  
 Max.   : 2.0914   Max.   : 8.7334     Max.   : 5.5038   Max.   : 3.6965  
 Physical_Demand   Hiring_Potential  
 Min.   :-1.1178   Min.   :-3.67145  
 1st Qu.:-0.7150   1st Qu.:-0.60672  
 Median :-0.3720   Median : 0.06196  
 Mean   : 0.0000   Mean   : 0.00000  
 3rd Qu.: 0.3732   3rd Qu.: 0.59625  
 Max.   : 3.7325   Max.   : 2.73421  
[1] "Overall_Score"       "Average_Income(USD)" "Work_Environment"   
[4] "Stress_Level"        "Physical_Demand"     "Hiring_Potential"   
[7] "Stress_category"    

Layout the Dashboard

Row

Histogram Chart (Stress-Levels)

Scatterplot-2D Density (Jobsa Physical Demand, Overall Rank)

Tree-Map Chart

t-SNE Manifold Chart

Row

2D Scatterplot

---
title: "SOCR Jobs Dashboard"
author: "SOCR Team"
date: "`r format(Sys.time(), '%B %Y')`"
output: 
  flexdashboard::flex_dashboard:
    orientation: rows
    social: menu
    source_code: embed
---

```{r setup, include=FALSE}
# install.packages("flexdashboard")
library(flexdashboard)
library(knitr) 
library(ggplot2)
library(plotly)
library(plyr)
```

Load the Data
=======================================================================

This [SOCR Webapp](http://socr.umich.edu/HTML5/) utilizes the [SOCR 2011 US Jobs Dataset](http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking) to demonstrate the dynamic data dashboarding using Rmarkdown/Knitr, Plotly, and FlexDashboard. Another [SOCR Dahsboard app](http://socr.umich.edu/HTML5/Dashboard/) provides a purely HTML/JavaScript mechanism for graphical database query, hypothesis generation, and exploratory data analytics. Another [SOCR Webapp](http://socr.umich.edu/HTML5/) illustrates [dimensionality reduction using t-SNE for the same Jobs dataset](http://socr.umich.edu/HTML5/SOCR_TensorBoard_UKBB/UsersData.html).

Scroll through the *four main tabs* on the top: [Data Import](./SOCR_Dashboard_Example_Jobs.html#load-the-data), [Data Summary](./SOCR_Dashboard_Example_Jobs.html#data-summary), [Graphical Dashboard](./SOCR_Dashboard_Example_Jobs.html#layout-the-dashboard), and `Source Code`. Additional details on [RMarkdown, HTML widgets, R dashboarding, and interactive docs are available here](https://rmarkdown.rstudio.com/gallery.html).

```{r echo=T}
library(rvest)
wiki_url <- read_html("http://wiki.socr.umich.edu/index.php/SOCR_Data_2011_US_JobsRanking")
html_nodes(wiki_url, "#content")

jobs <- html_table(html_nodes(wiki_url, "table")[[1]])
```

Data Summary
=======================================================================

```{r}
jobs1 <- jobs[ , -c(1,7,10)]
job.names <- jobs1[ , 1]
jobs2 <- jobs1[ , -1]
colnames(jobs2); summary(jobs2)

jobs3 <- apply(jobs2, 2, scale); summary(jobs3)

jobs3_color <- paste0("#", jobs[ , 7])

jobs3_color1 <- expand.grid(color_1 = min(jobs3_color), 
                            color_2 = max(jobs3_color),  stringsAsFactors = FALSE) 

jobs2$Stress_category <- jobs[ , 7]
colnames(jobs2)
```

Layout the Dashboard
=======================================================================

Row {data-width=300}
-----------------------------------------------------------------------

### Histogram Chart (Stress-Levels)

```{r}
# install.packages("highcharter")
library(highcharter)
library(ggplot2)
library(plotly)

# p1 <- ggplot(jobs2, aes(x = Stress_category)) +
#  geom_histogram(binwidth = 0.5,
#                 col = rainbow(11), fill = rainbow(11)) +
#  ggtitle("Frequency histogram of Jobs by Stress-level Category")
# p1

df <- data.frame(Jobs_Physical_Demand <- jobs2$Physical_Demand, 
                 Jobs_Overall_Rank <- jobs2$Overall_Score)

highchart() %>% hc_title(text="Job Stress-Levels (X) against Job Ranking (Y)") %>% 
  hc_chart(type = "bar", showInLegend = FALSE) %>% 
  hc_add_series(data = jobs2$Stress_category, color = "blue")
```

### Scatterplot-2D Density (Jobsa Physical Demand, Overall Rank)

```{r}
p <- ggplot(df, aes(Jobs_Physical_Demand, Jobs_Overall_Rank)) + 
     geom_point(alpha = 0.5) + 
     geom_density_2d() + 
     theme(panel.background = element_rect(fill = '#ffffff'))

ggplotly(p)
```

### Tree-Map Chart

```{r}
# install.packages("treemap")
library(treemap)
library(flexdashboard)

# Define income categories by Q1, Q2=median, Q3
jobs2$IncomeCat <- cut(jobs2$`Average_Income(USD)`, c(0, 33910, 47138, 63180, Inf), labels=c(1:4))
jobs2$HP <- jobs2$Hiring_Potential - min(jobs2$Hiring_Potential)

tm2 <- treemap(jobs, index = c("Job_Title", "Stress_Category"),
              vSize = "Average_Income(USD)", vColor = "Overall_Score",
              type = "value", palette = rev(rainbow(6)),
              draw = F)

hctreemap(tm2, allowDrillToNode = TRUE, layoutAlgorithm = "squarified", joinBy = "Stress_Category") %>% 
   hc_title(text = "US 2011 Jobs Data\n (Size:Avg.Income, Color:Stress-Level Category)") %>% 
   hc_tooltip(pointFormat = "{point.name}:
Average_Income(USD): {point.value:,.0f}
Overall_Score: {point.valuecolor:,.0f}") ``` ### t-SNE Manifold Chart ```{r} # ggplot jobs into the 2D t-SNE manifold library("tsne") set.seed(13242) tsne_jobs <- jobs2 %>% as.data.frame() %>% dplyr::tbl_df() %>% model.matrix(~., data = .) %>% as.data.frame() %>% dplyr::tbl_df() %>% .[-c(1,8)] %>% tsne(perplexity = 60) df2 <- jobs2 %>% dplyr::mutate(x = tsne_jobs[, 1], y = tsne_jobs[, 2]) df2_centers <- df2 %>% dplyr::group_by(Stress_category) %>% dplyr::summarise(cx = mean(x), cy = mean(y), sdcx = sd(x), sdcy = sd(y)) df2$Stress_category <- as.factor(df2$Stress_category) cols <- df2 %>% dplyr::select(Stress_category) %>% dplyr::distinct() %>% { setNames(.$Stress_category, .$Stress_category) } ggPlotMe <- ggplot(df2) + geom_point(aes(x, y, color = Stress_category), size = 4, alpha = 0.5) + scale_color_manual("Type", values = cols) + geom_text(data = df2_centers, aes(cx, cy, label = Stress_category)) + theme_minimal() + theme(legend.position = "right") # + # facet_wrap(~Stress_category) # if you want to separate the plots for each Stress Level ggPlotMe ``` Row {data-width=900} ----------------------------------------------------------------------- ### 2D Scatterplot ```{r} df2$Job_Title <- jobs$Job_Title ds <- df2 %>% # list output dplyr::select(Overall_Score, "Average_Income(USD)", Work_Environment, Stress_Level, Physical_Demand, Hiring_Potential, Stress_category, color = Stress_category, x, y) %>% list_parse() # Convert the DF object to list preserving the object structure ds2 <- df2 %>% # list output dplyr::select(color = Stress_Level, x, y) %>% mutate(color = hex_to_rgba(color, 0.05)) %>% list_parse() tooltip <- c("Job_Title", "Stress_category", "Physical_Demand") # jobs$"Job_Title", "Stress_Category", Average_Income(USD): Overall_Score:" scatter_HChart <- highchart() %>% hc_chart(zoomType = "xy") %>% hc_xAxis(minRange = diff(range(df2$Stress_Level))/5) %>% hc_yAxis(minRange = diff(range(df2$Hiring_Potential))/5) %>% hc_add_series(data = ds, type = "scatter", name = "Job_Title", states = list(hover = list(halo = list( size = 50, attributes = list( opacity = 0.8) )))) %>% hc_add_series(data = ds2, type = "scatter", marker = list(radius = 50, symbol = "circle"), zIndex = -3, enableMouseTracking = F, linkedTo = ":previous" ) %>% hc_plotOptions(series = list()) %>% hc_tooltip( useHTML = TRUE, borderRadius = 1, borderWidth = 5, headerFormat = "", pointFormat = tooltip, footerFormat = "
" ) %>% hc_add_theme( hc_theme_null( chart = list( backgroundColor = "transparent", style = list( fontFamily = "Roboto" ) ) ) ) scatter_HChart ```