1 Description

This R notebook is a bioinformatics pipeline to analyze protein saturation/under-utilization with a resource allocation model for the chemolithoautotroph Ralstonia eutropha (a.k.a. Cupriavidus necator).

2 Libraries

suppressPackageStartupMessages({
  library(lattice)
  library(latticeExtra)
  library(latticetools)
  library(tidyverse)
  library(stringi)
})

3 Data import

Define the data source directories. Some of them are external in the sense of not included in the accompanying data folder of this R notebook. These are located in the accompanying github repository for the resource allocation model that was used here. The resource allocation model can be found at my fork of Bacterial-RBA-models.

Reutropha_proteomics <- "../data/input/Ralstonia_eutropha.Rdata"
model_reactions <- "../data/input/model_reactions.csv"
simulation_dir <- "../data/simulation/substrate_limitation/"
source("read_rba_result.R")

Read simulation data.

# read simulation results
df_flux <- read_rba_result(list.files(simulation_dir, pattern = "fluxes_.*.tsv$", full.names = TRUE))
df_prot <- read_rba_result(list.files(simulation_dir, pattern = "proteins_.*.tsv", full.names = TRUE))
df_macr <- read_rba_result(list.files(simulation_dir, pattern = "macroprocesses_.*.tsv", full.names = TRUE))

4 Overview on substrate uptake, growth, and yield

After running a set of simulations in RBApy that simulate increasing substrate limitation, we can plot the substrate uptake rate q, yield Y in gram biomass per gram substrate, and growth rate µ. Unlike genome scale models, growth becomes limited by the maximum amount of proteins that a cell can synthesize. If cells would not be protein-limited, or proteins would catalyze reactions infinitely fast, no such limitation would take place and growth rate would scale linearly with substrate concentration. This is the situation in FBA simulation.

# rearrange some rows (mu, qS) to columns
df_macr <- df_macr %>% filter(!grepl("test_process", key)) %>% 
  spread(key, value) %>%
  
  # add type of simulation
  mutate(substrate = case_when(
    carbon_source == "for" ~ "formate",
    carbon_source == "succ" ~ "succinate",
    carbon_source == "fru" & nitrogen_conc == 18.7 ~ "fructose",
    carbon_source == "fru" & nitrogen_conc != 18.7 ~ "ammonium"
  )) %>%
  
  # add uptake rate in g/gDCW*h instead of mmol
  mutate(qS_g_gDCW_h = case_when(
    substrate == "formate" ~ qS*0.04603,
    substrate == "succinate" ~ qS*0.11809,
    substrate == "fructose" ~ qS*0.18016,
    substrate == "ammonium" ~ qS*0.05349
  ))

First we can have a look at how growth rate levels off with increasing substrate concentration in mmol/L. Note: this is not equal to substrate uptake rate. Substrate uptake rate and growth rate should have an almost linear relationship. We can log-transform the carbon concentration and fit a linear model to predict the substrate concentration required to obtain a certain substrate uptake rate and growth rate.

# copy nitrogen to carbon concentration for ammonium limitation,
# just for plotting purposes
df_macr_viz <- df_macr %>%
  mutate(carbon_conc = case_when(
    substrate == "ammonium" ~ nitrogen_conc,
    TRUE ~ carbon_conc
  ))

plot_mu_qs_lin <- xyplot(mu ~ carbon_conc | substrate, df_macr_viz,
    par.settings = custom.colorblind(),
    between = list(x = 0.5, y = 0.5),
    layout = c(4,1), lwd = 1.5, pch = 19,
    xlab = expression("S [mM]"),
    ylab = expression('µ [h'^'-1'*']'),
    scales = list(alternating = FALSE),
    panel = function(x, y, ...) {
      panel.grid(h = -1, v = -1, col = grey(0.9))
      panel.xyplot(x, y, cex = 0.9, ...)
      #panel.lmlineq(x[1:4], y[1:4], fontfamily = "FreeSans", ...)
    }
  )

plot_mu_qs_log <- xyplot(log10(carbon_conc) ~ qS | substrate,
    df_macr_viz,
    par.settings = custom.colorblind(),
    between = list(x = 0.5, y = 0.5), #xlim = c(0, 5),
    layout = c(4,1), lwd = 1.5, pch = 19,
    xlab = expression('q'[S]*' mmol g DCW'^-1*'h'^-1),
    ylab =  expression('log'[10]*' S [mM]'),
    scales = list(alternating = FALSE),
    panel = function(x, y, ...) {
      panel.grid(h = -1, v = -1, col = grey(0.9))
      panel.xyplot(x, y, cex = 0.9, ...)
      panel.lmlineq(x[1:6], y[1:6], fontfamily = "FreeSans", ...)
    }
  )

print(plot_mu_qs_lin, split = c(1,1,1,2), more = TRUE)
print(plot_mu_qs_log, split = c(1,2,1,2))


df_macr_viz %>%
  group_by(substrate) %>%
  # fit linear model to substrate uptake rate vs concentration
  summarize(
    slope = lm(x ~ y, data = list(x = log10(carbon_conc), y = qS))$coeff[2],
    offset = lm(x ~ y, data = list(x = log10(carbon_conc), y = qS))$coeff[1]
  ) %>% mutate(model = paste0("c = 10^(", round(offset, 3), " + ", round(slope, 3), "*qS)"))

Create a Herbert-Pirt plot for each condition (growth rate versus substrate uptake rate). This plot would show a change in yield by a ‘kink’ of the data points.

xyplot(qS_g_gDCW_h ~ mu | substrate, df_macr,
  par.settings = custom.colorblind(),
  between = list(x = 0.5, y = 0.5),
  layout = c(4,1), lwd = 1.5, pch = 19,
  ylab = expression("q"[S]*" [g h"^-1*" gDCW"^-1*"]"),
  xlab = expression('µ [h'^'-1'*']'),
  scales = list(alternating = FALSE),
  panel = function(x, y, ...) {
    panel.grid(h = -1, v = -1, col = grey(0.9))
    panel.xyplot(x, y, cex = 0.9, ...)
    # displaying maintenance and yield coefficients
    coef <- lm(y ~ x, data.frame(x, y))$coeff
    panel.text(median(x), 2.7, 
      paste("ms =", round(coef[[1]], 3), "g h-1 g_DCW-1"), 
      col = grey(0.3), cex = 0.7)
    panel.text(median(x), 2.4, paste(expression("Yx/S ="), 
        round(1/coef[[2]], 3), "g_DCW g_S-1"), 
      col = grey(0.3), cex = 0.7)
  }
)