version 1.0 import "../../../tasks/wdl/StarAlign.wdl" as StarAlign import "../../../tasks/wdl/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/wdl/Metrics.wdl" as Metrics import "../../../tasks/wdl/H5adUtils.wdl" as H5adUtils import "../../../tasks/wdl/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/wdl/MergeSortBam.wdl" as Merge import "../../../tasks/wdl/Utilities.wdl" as utils ## Copyright Broad Institute, 2022 ## ## This WDL pipeline implements data processing for RNA with UMIs ## ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. ## For program versions, see docker containers. ## ## LICENSING : ## This script is released under the WDL source code license (BSD-3) (see LICENSE in ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may ## be subject to different licenses. Users are responsible for checking that they are ## authorized to run all programs before running this script. workflow SlideSeq { String pipeline_version = "3.6.3" input { Array[File] r1_fastq Array[File] r2_fastq Array[File]? i1_fastq String input_id String read_structure File tar_star_reference File annotations_gtf String output_bam_basename Boolean count_exons = true File bead_locations String cloud_provider } # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" String warp_tools_docker = "warp-tools:2.6.1" String star_merge_docker = "star-merge-npz:1.3.0" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" String acr_docker_prefix = "dsppipelinedev.azurecr.io/" # choose docker prefix based on cloud provider String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # make sure either gcp or azr is supplied as cloud_provider input if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { call utils.ErrorWithMessage as ErrorMessageIncorrectInput { input: message = "cloud_provider must be supplied with either 'gcp' or 'azure'." } } parameter_meta { r1_fastq: "Array of Read 1 FASTQ files; forward read; contains cell barcodes and molecule barcodes" r2_fastq: "Array of Read 2 FASTQ files; reverse read; contains cDNA fragment generated from captured mRNA" i1_fastq: "Optional array of i1 FASTQ files; index read used for demultiplexing of multiple samples on one flow cell" input_id: "Name of sample matching this file; inserted into read group header" read_structure: "String used to specify the UMI (M) and Barcode (C) positions in the Read 1 FASTQ" } call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: tar_star_reference = tar_star_reference, ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call Metrics.FastqMetricsSlideSeq as FastqMetrics { input: r1_fastq = r1_fastq, read_structure = read_structure, sample_id = input_id, whitelist = bead_locations } call FastqProcessing.FastqProcessingSlidSeq as SplitFastq { input: r1_fastq = r1_fastq, r2_fastq = r2_fastq, i1_fastq = i1_fastq, read_structure = read_structure, sample_id = input_id, whitelist = bead_locations } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { call StarAlign.STARsoloFastqSlideSeq as STARsoloFastqSlideSeq { input: r1_fastq = [SplitFastq.fastq_R1_output_array[idx]], r2_fastq = [SplitFastq.fastq_R2_output_array[idx]], whitelist = bead_locations, tar_star_reference = tar_star_reference, output_bam_basename = output_bam_basename + "_" + idx, read_structure = read_structure, count_exons = count_exons } } call Merge.MergeSortBamFiles as MergeBam { input: bam_inputs = STARsoloFastqSlideSeq.bam_output, output_bam_filename = output_bam_basename + ".bam", sort_order = "coordinate", picard_cloud_docker_path = docker_prefix + picard_cloud_docker } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id } call Metrics.CalculateCellMetrics as CellMetrics { input: bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, warp_tools_docker_path = docker_prefix + warp_tools_docker } call StarAlign.MergeStarOutput as MergeStarOutputs { input: barcodes = STARsoloFastqSlideSeq.barcodes, features = STARsoloFastqSlideSeq.features, matrix = STARsoloFastqSlideSeq.matrix, input_id = input_id, star_merge_docker_path = docker_prefix + star_merge_docker } if ( !count_exons ) { call H5adUtils.SlideseqH5adGeneration as SlideseqH5adGeneration{ input: input_id = input_id, annotation_file = annotations_gtf, cell_metrics = CellMetrics.cell_metrics, gene_metrics = GeneMetrics.gene_metrics, sparse_count_matrix = MergeStarOutputs.sparse_counts, cell_id = MergeStarOutputs.row_index, gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", pipeline_version = "SlideSeq_v~{pipeline_version}", warp_tools_docker_path = docker_prefix + warp_tools_docker } } if (count_exons) { call StarAlign.MergeStarOutput as MergeStarOutputsExons { input: barcodes = STARsoloFastqSlideSeq.barcodes_sn_rna, features = STARsoloFastqSlideSeq.features_sn_rna, matrix = STARsoloFastqSlideSeq.matrix_sn_rna, input_id = input_id, star_merge_docker_path = docker_prefix + star_merge_docker } call H5adUtils.SingleNucleusSlideseqH5adOutput as SlideseqH5adGenerationWithExons{ input: input_id = input_id, annotation_file = annotations_gtf, cell_metrics = CellMetrics.cell_metrics, gene_metrics = GeneMetrics.gene_metrics, sparse_count_matrix = MergeStarOutputs.sparse_counts, cell_id = MergeStarOutputs.row_index, gene_id = MergeStarOutputs.col_index, sparse_count_matrix_exon = MergeStarOutputsExons.sparse_counts, cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, pipeline_version = "SlideSeq_v~{pipeline_version}", warp_tools_docker_path = docker_prefix + warp_tools_docker } } File final_h5ad_output = select_first([SlideseqH5adGenerationWithExons.h5ad_output, SlideseqH5adGeneration.h5ad_output]) output { String pipeline_version_out = pipeline_version File genomic_reference_version = ReferenceCheck.genomic_ref_version File bam = MergeBam.output_bam # sparse count matrix File matrix = MergeStarOutputs.sparse_counts File matrix_row_index = MergeStarOutputs.row_index File matrix_col_index = MergeStarOutputs.col_index File cell_metrics = CellMetrics.cell_metrics File gene_metrics = GeneMetrics.gene_metrics File umi_metrics = UMIsMetrics.umi_metrics File fastq_barcode_distribution = FastqMetrics.barcode_distribution File fastq_umi_distribution = FastqMetrics.umi_distribution File fastq_reads_per_cell = FastqMetrics.numReads_perCell File fastq_reads_per_umi = FastqMetrics.numReads_perUMI # h5ad File? h5ad_output_file = final_h5ad_output } }