1 Description

This R notebook is a bioinformatics pipeline to analyze fitness data obtained from a barcoded transposon library in Ralstonia eutropha a.k.a. Cupriavidus necator. For background and details regarding the method, see Wetmore at al., mBio, 2015 and Price et al., Nature, 2018).

2 Libraries

# optionally install repos from github
# devtools::install_github("m-jahn/lattice-tools")
# devtools::install_github("m-jahn/R-tools")
library(lattice)
library(latticeExtra)
library(latticetools)
library(tidyverse)
library(dendextend)
library(Rtools)
library(colorspace)
library(stringi)

3 Overview of barcode/transposon read counts

3.1 Data import and processing

Read in the main data tables with A) reads per barcode and sample (‘pool counts’), and B) the fitness tables. Tables were obtained by processing sequencing data with a custom BarSeq pipeline. The 32 generation sequencing samples are removed due to the low read count in the continuous samples.

# import barseq counts data in wide format and reshape to long format
df_counts_frc <- read_tsv("../../../rebar/data/20201222_barseq_frc/results/result.poolcount") %>%
  select(!matches("32gen|_32_")) %>%
  pivot_longer(
    cols = !all_of(c("barcode", "rcbarcode", "scaffold", "strand", "pos")), 
    names_to = "sample", values_to = "n_reads")

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  barcode = col_character(),
  rcbarcode = col_character(),
  scaffold = col_character(),
  strand = col_character()
)
ℹ Use `spec()` for the full column specifications.
df_counts_suc <- read_tsv("../../../rebar/data/20210407_barseq_suc_for/results/result.poolcount") %>%
  pivot_longer(
    cols = !all_of(c("barcode", "rcbarcode", "scaffold", "strand", "pos")), 
    names_to = "sample", values_to = "n_reads")

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  barcode = col_character(),
  rcbarcode = col_character(),
  scaffold = col_character(),
  strand = col_character()
)
ℹ Use `spec()` for the full column specifications.
# merge barcode counts tables
df_counts <- bind_rows(df_counts_frc, df_counts_suc)

# import fitness data, the final output of the BarSeq pipeline
load("../../../rebar/data/20201222_barseq_frc/results/fitness_gene.Rdata")
df_fitness_frc <- fitness_gene %>%
  filter(condition != "long pulse", time != 32) %>%
  mutate(ID = as.numeric(ID), substrate = "fructose", 
    condition = str_remove(condition, "short "))

load("../../../rebar/data/20210407_barseq_suc_for/results/fitness_gene.Rdata")
df_fitness_suc <- fitness_gene %>%
  separate(condition, sep = "_", into = c("substrate", "condition"))

# merge fitness tables
df_fitness <- bind_rows(df_fitness_frc, df_fitness_suc) %>%
  rename(locus_tag = locusId)
rm("df_fitness_frc", "df_fitness_suc", "df_counts_frc", "df_counts_suc")

# import genome annotation
df_ref <- read_csv("../data/ref/Ralstonia_H16_genome_annotation.csv") %>%
  filter(!duplicated(locus_tag)) %>%
  mutate(eggNOG_name = if_else(is.na(eggNOG_name), gene_name, eggNOG_name))

── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_character(),
  length = col_double(),
  start = col_double(),
  end = col_double(),
  feature_interval_length = col_double(),
  Psortb_score = col_double()
)
ℹ Use `spec()` for the full column specifications.
# define standard colors
stdcol <- custom.colorblind()$superpose.line$col

3.2 Summary statistics

Overview about the number of reads per barcode, barcodes per gene and so on. Around 8-10 M reads were mapped on average, per sample.

# Number of total mapped reads
df_counts %>% group_by(sample) %>%
  summarize(n_million_reads = sum(n_reads)/10^6) %>%
  barchart(factor(sample) ~ n_million_reads, .,
    par.settings = custom.colorblind(),
    horizontal = TRUE, border = NULL,
    scales = list(y = list(cex = 0.7)),
    panel = function(x, y, ...) {
      panel.grid(h = -1, v = -1, col = grey(0.9))
      panel.barchart(x, y, ...)
    }
  )