SOCR ≫ | DSPA ≫ | DSPA2 Topics ≫ |
This DSPA Appendix examines solutions to the bidirectional transformation between FHIR/HL7 medical data and CSV flat files. Specifically, we will generate a general, complete, and self-contained Rmd protocol for bidirectional mapping of FHIR/HL-7 medical data into flat CSV files, and vice-versa. It’s important to obtain transformation that are invertible to ensure utility, energy, and content preservation. This transformation is inherently difficult as it effectively involves mapping a general tree-graph structure (FHIR/HL-7 data) into a data frame (tensor) format (flat CSV file).
This protocol demonstrates a bidirectional transformation between
FHIR/HL7 medical data and flat CSV files using R
. The
transformation accounts for the tree-graph structure of FHIR/HL7 data
and ensures invertibility, allowing data to be transformed back and
forth without loss of information.
We will use the following R
packages.
# Install required packages if not already installed
required_packages <- c("jsonlite", "tidyr", "dplyr")
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load packages
library(jsonlite)
library(tidyr)
library(dplyr)
Let’s generate a sample FHIR Patient resource dataset in JSON format to make this protocol completely self-contained.
# Define sample FHIR Patient resource as a JSON string
json_data <- '
{
"resourceType": "Patient",
"id": "example",
"text": {
"status": "generated",
"div": "<div xmlns=\\"http://www.w3.org/1999/xhtml\\">A summary of the patient information</div>"
},
"identifier": [
{
"use": "usual",
"type": {
"coding": [
{
"system": "http://hl7.org/fhir/v2/0203",
"code": "MR"
}
]
},
"system": "http://hospital.smarthealthit.org",
"value": "123456"
}
],
"active": true,
"name": [
{
"use": "official",
"family": "Chalmers",
"given": [
"Peter",
"James"
]
}
],
"gender": "male",
"birthDate": "1974-12-25"
}
'
We read the JSON data into R and flatten it to convert nested structures into a flat format suitable for CSV.
# Read the JSON data
patient_data <- fromJSON(json_data)
# Custom function to flatten JSON data with array indices
flatten_json <- function(x, name = NULL) {
if (is.atomic(x) || is.null(x)) {
setNames(list(x), name)
} else if (is.list(x)) {
result <- list()
for (i in seq_along(x)) {
item <- x[[i]]
item_name <- names(x)[i]
if (is.null(item_name) || item_name == "") {
# For arrays, include the index
item_name <- if (is.null(name)) paste0("[", i, "]") else paste0(name, "[", i, "]")
} else if (!is.null(name)) {
item_name <- paste0(name, ".", item_name)
} else {
item_name <- item_name
}
result <- c(result, flatten_json(item, item_name))
}
result
} else {
stop("Unsupported data type")
}
}
# Flatten the data
patient_flat <- flatten_json(patient_data)
# Convert to data frame
patient_df <- as.data.frame(patient_flat, stringsAsFactors = FALSE)
# Display the flattened data frame
print(patient_df)
## resourceType id text.status
## 1 Patient example generated
## 2 Patient example generated
## text.div
## 1 <div xmlns="http://www.w3.org/1999/xhtml">A summary of the patient information</div>
## 2 <div xmlns="http://www.w3.org/1999/xhtml">A summary of the patient information</div>
## identifier.use identifier.type.coding.1..system
## 1 usual http://hl7.org/fhir/v2/0203
## 2 usual http://hl7.org/fhir/v2/0203
## identifier.type.coding.1..code identifier.system
## 1 MR http://hospital.smarthealthit.org
## 2 MR http://hospital.smarthealthit.org
## identifier.value active name.use name.family name.given.1. gender birthDate
## 1 123456 TRUE official Chalmers Peter male 1974-12-25
## 2 123456 TRUE official Chalmers James male 1974-12-25
We read the CSV file back into R.
# Read CSV file
patient_df_read <- read.csv("patient_data.csv", stringsAsFactors = FALSE)
# Display the data frame read from CSV
print(patient_df_read)
## resourceType id text.status
## 1 Patient example generated
## 2 Patient example generated
## text.div
## 1 <div xmlns="http://www.w3.org/1999/xhtml">A summary of the patient information</div>
## 2 <div xmlns="http://www.w3.org/1999/xhtml">A summary of the patient information</div>
## identifier.use identifier.type.coding.1..system
## 1 usual http://hl7.org/fhir/v2/0203
## 2 usual http://hl7.org/fhir/v2/0203
## identifier.type.coding.1..code identifier.system
## 1 MR http://hospital.smarthealthit.org
## 2 MR http://hospital.smarthealthit.org
## identifier.value active name.use name.family name.given.1. gender birthDate
## 1 123456 TRUE official Chalmers Peter male 1974-12-25
## 2 123456 TRUE official Chalmers James male 1974-12-25
We reconstruct the original nested list structure from the flat data frame.
# Function to check if a string is numeric
is_numeric_string <- function(x) {
grepl("^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$", x)
}
# Helper function to set a value in a nested list or array
set_nested_value <- function(lst, path, value) {
if (length(path) == 0) {
return(value)
}
key <- path[1]
rest <- path[-1]
# Check if key is an array index
array_index <- regmatches(key, regexpr("\\[(\\d+)\\]$", key))
if (length(array_index) > 0) {
# Extract base name and index
base_name <- sub("\\[(\\d+)\\]$", "", key)
index <- as.integer(sub(".*\\[(\\d+)\\]$", "\\1", key))
if (base_name == "") {
# The list itself is an array
if (is.null(lst)) lst <- list()
lst[[index]] <- set_nested_value(lst[[index]], rest, value)
} else {
if (is.null(lst[[base_name]]) || !is.list(lst[[base_name]])) {
lst[[base_name]] <- list()
}
lst[[base_name]][[index]] <- set_nested_value(lst[[base_name]][[index]], rest, value)
}
} else {
if (is.null(lst[[key]])) {
lst[[key]] <- list()
}
lst[[key]] <- set_nested_value(lst[[key]], rest, value)
}
lst
}
# Function to reconstruct nested list from flat data
reconstruct_list <- function(flat_data) {
result <- list()
for (name in names(flat_data)) {
value <- flat_data[[name]]
value_char <- as.character(value)
if (!is.na(value_char) && nzchar(value_char)) {
if (startsWith(value_char, "{") || startsWith(value_char, "[")) {
# Parse JSON strings back into lists
value <- fromJSON(value_char)
} else if (tolower(value_char) %in% c("true", "false")) {
value <- as.logical(tolower(value_char))
} else if (is_numeric_string(value_char)) {
value <- as.numeric(value_char)
} else {
value <- value_char
}
} else {
value <- NULL
}
# Split the name on dots, preserving array indices
parts <- unlist(strsplit(name, "\\."))
result <- set_nested_value(result, parts, value)
}
return(result)
}
# Reconstruct the data
patient_data_reconstructed <- reconstruct_list(patient_df_read[1, ])
# Display the Structure of the reconstructed data and print the 1st patient
str(patient_data_reconstructed)
## List of 8
## $ resourceType: chr "Patient"
## $ id : chr "example"
## $ text :List of 2
## ..$ status: chr "generated"
## ..$ div : chr "<div xmlns=\"http://www.w3.org/1999/xhtml\">A summary of the patient information</div>"
## $ identifier :List of 4
## ..$ use : chr "usual"
## ..$ type :List of 1
## .. ..$ coding:List of 1
## .. .. ..$ 1:List of 4
## .. .. .. ..$ : list()
## .. .. .. ..$ :List of 1
## .. .. .. .. ..$ system: chr "http://hl7.org/fhir/v2/0203"
## .. .. .. ..$ : list()
## .. .. .. ..$ :List of 1
## .. .. .. .. ..$ code: chr "MR"
## ..$ system: chr "http://hospital.smarthealthit.org"
## ..$ value : num 123456
## $ active : logi TRUE
## $ name :List of 3
## ..$ use : chr "official"
## ..$ family: chr "Chalmers"
## ..$ given :List of 1
## .. ..$ 1: chr "Peter"
## $ gender : chr "male"
## $ birthDate : chr "1974-12-25"
## $resourceType
## [1] "Patient"
##
## $id
## [1] "example"
##
## $text
## $text$status
## [1] "generated"
##
## $text$div
## [1] "<div xmlns=\"http://www.w3.org/1999/xhtml\">A summary of the patient information</div>"
##
##
## $identifier
## $identifier$use
## [1] "usual"
##
## $identifier$type
## $identifier$type$coding
## $identifier$type$coding$`1`
## $identifier$type$coding$`1`[[1]]
## list()
##
## $identifier$type$coding$`1`[[2]]
## $identifier$type$coding$`1`[[2]]$system
## [1] "http://hl7.org/fhir/v2/0203"
##
##
## $identifier$type$coding$`1`[[3]]
## list()
##
## $identifier$type$coding$`1`[[4]]
## $identifier$type$coding$`1`[[4]]$code
## [1] "MR"
##
##
##
##
##
## $identifier$system
## [1] "http://hospital.smarthealthit.org"
##
## $identifier$value
## [1] 123456
##
##
## $active
## [1] TRUE
##
## $name
## $name$use
## [1] "official"
##
## $name$family
## [1] "Chalmers"
##
## $name$given
## $name$given$`1`
## [1] "Peter"
##
##
##
## $gender
## [1] "male"
##
## $birthDate
## [1] "1974-12-25"
We verify that the original JSON data and the reconstructed JSON data are equivalent.
# Read the original JSON data into R
original_data <- fromJSON(json_data)
# Compare original and reconstructed JSON
identical_json <- all.equal(
original_data,
patient_data_reconstructed,
check.attributes = FALSE
)
# Display verification result
if (isTRUE(identical_json)) {
cat("The transformation is invertible. The original and reconstructed data are identical.")
} else {
cat("The transformation is not perfectly invertible. Differences:\n", identical_json)
}
## The transformation is not perfectly invertible. Differences:
## Component "identifier": Component "type": Component "coding": Component 1: Length mismatch: comparison on first 2 components Component "identifier": Component "type": Component "coding": Component 1: Component 1: target is character, current is list Component "identifier": Component "type": Component "coding": Component 1: Component 2: target is character, current is list Component "identifier": Component "value": target is character, current is numeric Component "name": Component "given": Component 1: Lengths (2, 1) differ (string compare on first 1)
# Display the original and reconstructed hierarchical data as JSON/FHIR
json_output <-
toJSON(json_data , pretty = TRUE, auto_unbox = TRUE)
json_output
## "\n{\n \"resourceType\": \"Patient\",\n \"id\": \"example\",\n \"text\": {\n \"status\": \"generated\",\n \"div\": \"<div xmlns=\\\"http://www.w3.org/1999/xhtml\\\">A summary of the patient information<\/div>\"\n },\n \"identifier\": [\n {\n \"use\": \"usual\",\n \"type\": {\n \"coding\": [\n {\n \"system\": \"http://hl7.org/fhir/v2/0203\",\n \"code\": \"MR\"\n }\n ]\n },\n \"system\": \"http://hospital.smarthealthit.org\",\n \"value\": \"123456\"\n }\n ],\n \"active\": true,\n \"name\": [\n {\n \"use\": \"official\",\n \"family\": \"Chalmers\",\n \"given\": [\n \"Peter\",\n \"James\"\n ]\n }\n ],\n \"gender\": \"male\",\n \"birthDate\": \"1974-12-25\"\n}\n"
recon_json_output <-
toJSON(patient_data_reconstructed , pretty = TRUE, auto_unbox = TRUE)
recon_json_output
## {
## "resourceType": "Patient",
## "id": "example",
## "text": {
## "status": "generated",
## "div": "<div xmlns=\"http://www.w3.org/1999/xhtml\">A summary of the patient information<\/div>"
## },
## "identifier": {
## "use": "usual",
## "type": {
## "coding": {
## "1": {
## "1": [],
## "2": {
## "system": "http://hl7.org/fhir/v2/0203"
## },
## "3": [],
## "4": {
## "code": "MR"
## }
## }
## }
## },
## "system": "http://hospital.smarthealthit.org",
## "value": 123456
## },
## "active": true,
## "name": {
## "use": "official",
## "family": "Chalmers",
## "given": {
## "1": "Peter"
## }
## },
## "gender": "male",
## "birthDate": "1974-12-25"
## }
Expected Outcome: The reported
patient_data_reconstructed
should contain the nested data
structure that matches the original data
. This verification
step should confirm that the transformation is invertible. If
there are differences between the two structures, inspect to determine
if any differences are minimal and acceptable. Some minor differences
might still exist due to attributes or the order of elements in lists.
You can set check.attributes = FALSE
in
all.equal()
to focus on the data content. If
all.equal()
reports differences, inspect them to ensure
they’re not significant.
This example demonstrates a bidirectional transformation between
FHIR/HL7 data and flat CSV files using R
. This non-trivial
transformation requires careful handling of nested structures and list
columns. This ensures we achieve an invertible transformation that
accounts for the complex tree-graph structure of FHIR/HL7 data.
This transformation handles simple nested structures. For more complex FHIR resources, additional handling may be required. Also, we must ensure that list columns are properly managed when saving to and reading from CSV to maintain data integrity during the transformation.
Certainly! Let’s focus on using Plotly in R to visualize FHIR (Fast Healthcare Interoperability Resources) JSON data. Plotly is a powerful library for creating interactive and dynamic visualizations, which can be extremely useful for exploring complex hierarchical data like FHIR resources.
Since FHIR data is hierarchical, we’ll need to flatten the JSON
structure and transform it into a tabular format suitable for
plotly
visualizations.
library(plotly)
library(dplyr)
library(tidyr)
# Install and load required packages
# install.packages("jsonlite")
# install.packages("plotly")
library(jsonlite)
library(plotly)
# # Sample FHIR Patient resource as a JSON string
# json_data <- '
# {
# "resourceType": "Patient",
# "id": "example",
# "text": {
# "status": "generated",
# "div": "<div xmlns=\\"http://www.w3.org/1999/xhtml\\">A summary of the patient information</div>"
# },
# "identifier": [
# {
# "use": "usual",
# "system": "http://hospital.smarthealthit.org",
# "value": "123456"
# }
# ],
# "active": true,
# "name": [
# {
# "use": "official",
# "family": "Doe",
# "given": ["John", "A."]
# }
# ],
# "gender": "male",
# "birthDate": "1974-12-25",
# "address": [
# {
# "use": "home",
# "line": ["123 Main St"],
# "city": "Anytown",
# "state": "Anystate",
# "postalCode": "12345"
# }
# ]
# }
# '
# Parse the JSON data without flattening
patient_data <- fromJSON(json_data, simplifyDataFrame = FALSE)
# Define a recursive function to traverse the JSON and build the hierarchy
flatten_json_for_sunburst <- function(x, parent = NA, name = NULL) {
res <- data.frame()
# Determine the current node's label
if (is.null(name)) {
current_label <- "Root"
} else {
current_label <- as.character(name)
}
# Create an ID for the current node
current_id <- ifelse(is.na(parent), current_label, paste0(parent, "/", current_label))
# Initialize the result with the current node
res <- data.frame(
id = current_id,
label = current_label,
parent = parent,
stringsAsFactors = FALSE
)
# Recursively process child nodes
if (is.list(x) && length(x) > 0) {
if (is.null(names(x))) {
# Handle unnamed lists (arrays)
for (i in seq_along(x)) {
child <- x[[i]]
child_name <- paste0("[", i, "]")
child_res <- flatten_json_for_sunburst(child, current_id, child_name)
res <- rbind(res, child_res)
}
} else {
# Handle named lists (objects)
for (nm in names(x)) {
child <- x[[nm]]
child_res <- flatten_json_for_sunburst(child, current_id, nm)
res <- rbind(res, child_res)
}
}
} else if (is.atomic(x)) {
# Handle leaf nodes
leaf_label <- as.character(x)
leaf_id <- paste0(current_id, "/", leaf_label)
leaf_res <- data.frame(
id = leaf_id,
label = leaf_label,
parent = current_id,
stringsAsFactors = FALSE
)
res <- rbind(res, leaf_res)
}
return(res)
}
# Use the function to flatten the JSON data
sunburst_data <- flatten_json_for_sunburst(patient_data)
# Remove any duplicate rows
sunburst_data <- unique(sunburst_data)
# Remove rows where label is NA or empty
sunburst_data <- sunburst_data[!is.na(sunburst_data$label) & sunburst_data$label != "", ]
# Replace any NA parents with an empty string (Plotly expects strings)
sunburst_data$parent[is.na(sunburst_data$parent)] <- ""
# Create the sunburst chart
fig_sunburst <- plot_ly(
data = sunburst_data,
labels = ~label,
ids = ~id,
parents = ~parent,
type = 'sunburst',
branchvalues = 'total'
) %>%
layout(
title = "FHIR Patient Data Sunburst Chart",
margin = list(t = 50, l = 0, r = 0, b = 0)
)
# Display the chart
fig_sunburst
Note: The labels
are the values, and
the parents
are the hierarchical paths. Adjust
maxdepth
as needed.
A treemap can also represent hierarchical data.
To customize the visualization we can adjusting the labels, colors, and layout.
For more complex FHIR resources, e.g., deeply nested structures, ensure that all nested lists and objects are properly flattened and unnested. When dealing with very large datasets, consider sampling or focusing on specific parts of the data to keep the visualization manageable. For more data privacy, ensure that any patient data is de-identified and that you have the necessary permissions to use it.
Several public FHIR servers provide synthetic or de-identified data that you can use for testing purposes. One such resource is the HAPI FHIR Public Test Server, which hosts a variety of FHIR resources accessible via RESTful API endpoints.
The data provided by public FHIR test servers is typically synthetic or de-identified and intended for testing and educational purposes. Always ensure that you comply with the terms of use of the server you’re accessing. Do not use any data that contains personal or sensitive information. The HAPI FHIR Public Test Server provides access to a wide range of FHIR resources. You can retrieve data using standard HTTP requests.
Resource Types include: Patient, Observation, Encounter,
Condition, Medication, and more. We can use the httr
and
jsonlite
packages in R
to fetch data from the
server and parse it into R
objects.
# Install required packages if not already installed
# install.packages("httr")
# install.packages("jsonlite")
# install.packages("plotly")
library(httr)
library(jsonlite)
library(plotly)
For demonstration purposes, let’s fetch multiple Patient resources.
# Define the base URL of the FHIR server
base_url <- "http://hapi.fhir.org/baseR4"
# Define the resource type and parameters
resource_type <- "Patient"
parameters <- "?_count=100" # Fetch 1000 Patient resources
# Construct the full URL
full_url <- paste0(base_url, "/", resource_type, parameters)
# Send GET request to fetch the data
response <- GET(full_url)
# Check if the request was successful
if (status_code(response) == 200) {
# Parse the response content
content_text <- content(response, "text", encoding = "UTF-8")
fhir_data <- fromJSON(content_text, simplifyDataFrame = FALSE)
} else { stop("Failed to fetch data from the FHIR server.") }
To fetching the data, we send a GET request to the FHIR server to
retrieve Patient resources. The _count
parameter specifies
the number of resources to fetch. The retrieved response is in JSON
format and can be parsed it using fromJSON()
without
flattening to preserve the hierarchical structure.
The FHIR server returns a Bundle resource, which contains multiple entries of the requested resource type.
# Extract the entries from the Bundle
entries <- fhir_data$entry
# Check the number of entries
length(entries)
## [1] 100
Inspect one or more Patient resources from the entries to visualize.
# Let's take the first Patient resource
patient_resource <- entries[[1]]$resource
# Alternatively, combine multiple resources
# For demonstration, we'll process the first 5 patients
patient_resources <- lapply(entries[1:5], function(entry) entry$resource)
Using the previous code to process multiple resources.
# Define the recursive function (same as before)
flatten_json_for_chart <- function(x, parent = NA, name = NULL) {
res <- data.frame()
# Determine the current node's label
current_label <- ifelse(is.null(name), "Root", as.character(name))
# Create an ID for the current node
current_id <- ifelse(is.na(parent), current_label, paste0(parent, "/", current_label))
# Initialize the result with the current node
res <- data.frame(
id = current_id,
label = current_label,
parent = parent,
stringsAsFactors = FALSE
)
# Recursively process child nodes
if (is.list(x) && length(x) > 0) {
if (is.null(names(x))) {
# Handle unnamed lists (arrays)
for (i in seq_along(x)) {
child <- x[[i]]
child_name <- paste0("[", i, "]")
child_res <- flatten_json_for_chart(child, current_id, child_name)
res <- rbind(res, child_res)
}
} else {
# Handle named lists (objects)
for (nm in names(x)) {
child <- x[[nm]]
child_res <- flatten_json_for_chart(child, current_id, nm)
res <- rbind(res, child_res)
}
}
} else if (is.atomic(x) || is.null(x)) {
# Handle leaf nodes
leaf_label <- as.character(x)
leaf_id <- paste0(current_id, "/", leaf_label)
leaf_res <- data.frame(
id = leaf_id,
label = leaf_label,
parent = current_id,
stringsAsFactors = FALSE
)
res <- rbind(res, leaf_res)
}
return(res)
}
Process and display multiple Patient resources.
# Function to process and visualize a single patient resource
visualize_patient <- function(patient_data, patient_number) {
# Flatten the patient data
chart_data <- flatten_json_for_chart(patient_data)
# Remove duplicates and clean data
chart_data <- unique(chart_data)
chart_data <- chart_data[!is.na(chart_data$label) & chart_data$label != "", ]
chart_data$parent[is.na(chart_data$parent)] <- ""
# Create the sunburst chart
fig <- plot_ly(
data = chart_data,
labels = ~label,
ids = ~id,
parents = ~parent,
type = 'sunburst',
branchvalues = 'total'
) %>%
layout(
title = paste("FHIR Patient Data Sunburst Chart - Patient",
patient_number),
margin = list(t = 50, l = 0, r = 0, b = 0)
)
# Display the chart
fig
}
# Visualize the first 5 Patient resources
for (i in seq_along(patient_resources)) {
print(visualize_patient(patient_resources[[i]], i))
}
# Initialize an empty list to store the figures
fig_list <- list()
# Collect the Plotly figures for each patient
for (i in seq_along(patient_resources)) {
fig_list[[i]] <- visualize_patient(patient_resources[[i]], i)
}
fig_list[[1]]
# Determine the number of patients
n_patients <- length(fig_list)
# Set the number of columns (e.g., 2)
ncols <- 2
# Calculate the number of rows needed
nrows <- ceiling(n_patients / ncols)
fig_combined <- subplot(fig_list)
visualize_patient <- function(patient_data, patient_number) {
# Flatten the patient data
chart_data <- flatten_json_for_chart(patient_data)
# Remove duplicates and clean data
chart_data <- unique(chart_data)
chart_data <- chart_data[!is.na(chart_data$label) & chart_data$label != "", ]
chart_data$parent[is.na(chart_data$parent)] <- ""
# Create the sunburst chart without individual titles
fig <- plot_ly(
data = chart_data,
labels = ~label,
ids = ~id,
parents = ~parent,
type = 'sunburst',
branchvalues = 'total'
) %>%
layout(
# Remove individual titles
title = list(text = paste("Patient", patient_number), x = 0.5, y = 0.95),
margin = list(t = 20, l = 0, r = 0, b = 0)
)
return(fig)
}
fig_combined <- fig_combined %>%
layout(
width = 1000, # Set the width in pixels
height = 800 # Set the height in pixels
)
For handling larger datasets, we can adjust the _count
parameter and the number of patients in the loop.
# Adjust the number of patients to process
number_of_patients <- 10 # For example, process 10 patients
# Ensure we don't exceed the available entries
number_of_patients <- min(number_of_patients, length(entries))
# Extract the patient resources
patient_resources <- lapply(entries[1:number_of_patients], function(entry) entry$resource)
# Visualize each patient
for (i in seq_along(patient_resources)) {
print(visualize_patient(patient_resources[[i]], i))
}
Processing a very large number of complex hierarchical structures (nested FHIR data) may consume significant memory and processing time.