version 1.0 import "../../../tasks/wdl/FastqProcessing.wdl" as FastqProcessing import "../../../tasks/wdl/StarAlign.wdl" as StarAlign import "../../../tasks/wdl/Metrics.wdl" as Metrics import "../../../tasks/wdl/RunEmptyDrops.wdl" as RunEmptyDrops import "../../../tasks/wdl/CheckInputs.wdl" as OptimusInputChecks import "../../../tasks/wdl/H5adUtils.wdl" as H5adUtils import "../../../tasks/wdl/Utilities.wdl" as utils import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender workflow Optimus { meta { description: "The optimus 3' pipeline processes 10x genomics sequencing data based on the v2 chemistry. It corrects cell barcodes and UMIs, aligns reads, marks duplicates, and returns data as alignments in BAM format and as counts in sparse matrix exchange format." allowNestedInputs: true } input { String cloud_provider # Mode for counting either "sc_rna" or "sn_rna" String counting_mode = "sc_rna" # Sequencing data inputs Array[File] r1_fastq Array[File] r2_fastq Array[File]? i1_fastq String input_id # String for additional library aliquot ID String? gex_nhash_id String output_bam_basename = input_id String? input_name String? input_id_metadata_field String? input_name_metadata_field # organism reference parameters File tar_star_reference File annotations_gtf File? mt_genes String? soloMultiMappers = "Uniform" Int gex_expected_cells = 3000 # CellBender Boolean run_cellbender = false Int cellbender_memory_GB = 32 # Chemistry options include: 2 or 3 Int tenx_chemistry_version # Whitelist is selected based on the input tenx_chemistry_version File whitelist = checkOptimusInput.whitelist_out # read_structure is based on v2 or v3 chemistry String read_struct = checkOptimusInput.read_struct_out # Emptydrops lower cutoff Int emptydrops_lower = 100 # Set to true to override input checks and allow pipeline to proceed with invalid input Boolean force_no_check = false # Check that tenx_chemistry_version matches the length of the read 1 fastq; # Set to true if you expect that r1_read_length does not match length of UMIs/barcodes for 10x chemistry v2 (26 bp) or v3 (28 bp). Boolean ignore_r1_read_length = false # Set to Forward, Reverse, or Unstranded to account for stranded library preparations (per STARsolo documentation) String star_strand_mode = "Forward" # Set to true to count reads aligned to exonic regions in sn_rna mode Boolean count_exons = false # Set starsolo disk size as adjustable parameter # Int disk_starsolo # this pipeline does not set any preemptible varibles and only relies on the task-level preemptible settings # you could override the tasklevel preemptible settings by passing it as one of the workflows inputs # for example: `"Optimus.StarAlign.preemptible": 3` will let the StarAlign task, which by default disables the # usage of preemptible machines, attempt to request for preemptible instance up to 3 times. } # Version of this pipeline String pipeline_version = "8.0.5" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) # 10x parameters File gcp_whitelist_v2 = "gs://gcp-public-data--broad-references/RNA/resources/737k-august-2016.txt" File gcp_whitelist_v3 = "gs://gcp-public-data--broad-references/RNA/resources/3M-febrary-2018.txt" File azure_whitelist_v2 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/737k-august-2016.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" File azure_whitelist_v3 = "https://datasetpublicbroadref.blob.core.windows.net/dataset/RNA/resources/3M-febrary-2018.txt?sv=2020-04-08&si=prod&sr=c&sig=DQxmjB4D1lAfOW9AxIWbXwZx6ksbwjlNkixw597JnvQ%3D" # Takes the first read1 FASTQ from the inputs to check for chemistry match File r1_single_fastq = r1_fastq[0] # docker images String picard_cloud_docker = "picard-cloud:2.26.10" String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" String warp_tools_docker = "warp-tools:2.6.1" String star_merge_docker = "star-merge-npz:1.3.0" String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196" String samtools_star_python = "samtools-star-python:1.0.0" #TODO how do we handle these? String alpine_docker = "alpine-bash@sha256:965a718a07c700a5204c77e391961edee37477634ce2f9cf652a8e4c2db858ff" String gcp_alpine_docker_prefix = "bashell/" String acr_alpine_docker_prefix = "dsppipelinedev.azurecr.io/" String alpine_docker_prefix = if cloud_provider == "gcp" then gcp_alpine_docker_prefix else acr_alpine_docker_prefix String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" String gcp_ubuntu_docker_prefix = "gcr.io/gcp-runtimes/" String acr_ubuntu_docker_prefix = "dsppipelinedev.azurecr.io/" String ubuntu_docker_prefix = if cloud_provider == "gcp" then gcp_ubuntu_docker_prefix else acr_ubuntu_docker_prefix String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" String acr_docker_prefix = "dsppipelinedev.azurecr.io/" # choose docker prefix based on cloud provider String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # make sure either gcp or azr is supplied as cloud_provider if ((cloud_provider != "gcp") && (cloud_provider != "azure")) { call utils.ErrorWithMessage as ErrorMessageIncorrectInput { input: message = "cloud_provider must be supplied with either 'gcp' or 'azure'." } } parameter_meta { r1_fastq: "forward read, contains cell barcodes and molecule barcodes" r2_fastq: "reverse read, contains cDNA fragment generated from captured mRNA" i1_fastq: "(optional) index read, for demultiplexing of multiple samples on one flow cell." input_id: "name of sample matching this file, inserted into read group header" input_id_metadata_field: "String that describes the metadata field containing the input_id" input_name: "User provided sample name or cell_names" input_name_metadata_field: "String that describes the metadata field containing the input_name" tar_star_reference: "star genome reference" annotations_gtf: "gtf containing annotations for gene tagging (must match star reference)" whitelist: "10x genomics cell barcode allowlist" tenx_chemistry_version: "10X Genomics v2 (10 bp UMI) or v3 chemistry (12bp UMI)" force_no_check: "Set to true to override input checks and allow pipeline to proceed with invalid input" star_strand_mode: "STAR mode for handling stranded reads. Options are 'Forward', 'Reverse, or 'Unstranded.' Default is Forward." } call OptimusInputChecks.checkOptimusInput { input: force_no_check = force_no_check, counting_mode = counting_mode, count_exons = count_exons, gcp_whitelist_v2 = gcp_whitelist_v2, gcp_whitelist_v3 = gcp_whitelist_v3, azure_whitelist_v2 = azure_whitelist_v2, azure_whitelist_v3 = azure_whitelist_v3, tenx_chemistry_version = tenx_chemistry_version, r1_fastq = r1_single_fastq, ignore_r1_read_length = ignore_r1_read_length, cloud_provider = cloud_provider, alpine_docker_path = alpine_docker_prefix + alpine_docker } call StarAlign.STARGenomeRefVersion as ReferenceCheck { input: tar_star_reference = tar_star_reference, ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker } call StarAlign.STARsoloFastq as STARsoloFastq { input: r1_fastq = r1_fastq, r2_fastq = r2_fastq, star_strand_mode = star_strand_mode, white_list = whitelist, tar_star_reference = tar_star_reference, chemistry = tenx_chemistry_version, counting_mode = counting_mode, count_exons = count_exons, input_id = input_id, output_bam_basename = output_bam_basename, soloMultiMappers = soloMultiMappers, samtools_star_docker_path = docker_prefix + samtools_star_python } call Metrics.CalculateGeneMetrics as GeneMetrics { input: bam_input = STARsoloFastq.bam_output, mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateCellMetrics as CellMetrics { input: bam_input = STARsoloFastq.bam_output, mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, warp_tools_docker_path = docker_prefix + warp_tools_docker } if (counting_mode == "sc_rna"){ call RunEmptyDrops.RunEmptyDrops { input: sparse_count_matrix = STARsoloFastq.sparse_counts, row_index = STARsoloFastq.row_index, col_index = STARsoloFastq.col_index, emptydrops_lower = emptydrops_lower, empty_drops_docker_path = docker_prefix + empty_drops_docker } } if (!count_exons) { call H5adUtils.OptimusH5adGeneration{ input: input_id = input_id, gex_nhash_id = gex_nhash_id, expected_cells = gex_expected_cells, input_name = input_name, input_id_metadata_field = input_id_metadata_field, input_name_metadata_field = input_name_metadata_field, annotation_file = annotations_gtf, library_metrics = STARsoloFastq.library_metrics, cellbarcodes = STARsoloFastq.outputbarcodes, cell_metrics = CellMetrics.cell_metrics, gene_metrics = GeneMetrics.gene_metrics, sparse_count_matrix = STARsoloFastq.sparse_counts, cell_id = STARsoloFastq.row_index, gene_id = STARsoloFastq.col_index, empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, pipeline_version = "Optimus_v~{pipeline_version}", warp_tools_docker_path = docker_prefix + warp_tools_docker } } if (count_exons && counting_mode=="sn_rna") { call H5adUtils.SingleNucleusOptimusH5adOutput as OptimusH5adGenerationWithExons{ input: input_id = input_id, gex_nhash_id = gex_nhash_id, expected_cells = gex_expected_cells, input_name = input_name, counting_mode = counting_mode, input_id_metadata_field = input_id_metadata_field, input_name_metadata_field = input_name_metadata_field, annotation_file = annotations_gtf, library_metrics = STARsoloFastq.library_metrics, cellbarcodes = STARsoloFastq.outputbarcodes, cell_metrics = CellMetrics.cell_metrics, gene_metrics = GeneMetrics.gene_metrics, sparse_count_matrix = STARsoloFastq.sparse_counts, cell_id = STARsoloFastq.row_index, gene_id = STARsoloFastq.col_index, sparse_count_matrix_exon = STARsoloFastq.sparse_counts, cell_id_exon = STARsoloFastq.row_index, gene_id_exon = STARsoloFastq.col_index, pipeline_version = "Optimus_v~{pipeline_version}", warp_tools_docker_path = docker_prefix + warp_tools_docker } } # Call CellBender if (run_cellbender) { if (cloud_provider == "gcp") { call CellBender.run_cellbender_remove_background_gpu as CellBender { input: sample_name = input_id, input_file_unfiltered = final_h5ad_output, hardware_boot_disk_size_GB = 20, hardware_cpu_count = 4, hardware_disk_size_GB = 50, hardware_gpu_type = "nvidia-tesla-t4", hardware_memory_GB = cellbender_memory_GB, hardware_preemptible_tries = 2, hardware_zones = "us-central1-a us-central1-c", nvidia_driver_version = "470.82.01" } } if (cloud_provider == "azure") { call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda { input: sample_name = input_id, input_file_unfiltered = final_h5ad_output, hardware_boot_disk_size_GB = 20, hardware_cpu_count = 4, hardware_disk_size_GB = 50, hardware_gpu_type = "nvidia-tesla-t4", hardware_memory_GB = cellbender_memory_GB, hardware_preemptible_tries = 2, hardware_zones = "us-central1-a us-central1-c", nvidia_driver_version = "470.82.01" } } } File final_h5ad_output = select_first([OptimusH5adGenerationWithExons.h5ad_output, OptimusH5adGeneration.h5ad_output]) File final_library_metrics = select_first([OptimusH5adGenerationWithExons.library_metrics, OptimusH5adGeneration.library_metrics]) output { # version of this pipeline String pipeline_version_out = pipeline_version File genomic_reference_version = ReferenceCheck.genomic_ref_version # Metrics outputs File cell_metrics = CellMetrics.cell_metrics File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result # Star outputs File library_metrics = final_library_metrics File bam = STARsoloFastq.bam_output File matrix = STARsoloFastq.sparse_counts File matrix_row_index = STARsoloFastq.row_index File matrix_col_index = STARsoloFastq.col_index File? aligner_metrics = STARsoloFastq.cell_reads_out File? mtx_files = STARsoloFastq.mtx_files File? filtered_mtx_files = STARsoloFastq.filtered_mtx_files File? multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix File? multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix File? multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix File? multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix # h5ad File h5ad_output_file = final_h5ad_output # cellbender outputs File? cell_barcodes_csv = CellBender.cell_csv File? checkpoint_file = CellBender.ckpt_file Array[File]? h5_array = CellBender.h5_array Array[File]? html_report_array = CellBender.report_array File? log = CellBender.log Array[File]? metrics_csv_array = CellBender.metrics_array String? output_directory = CellBender.output_dir File? summary_pdf = CellBender.pdf } }