version 1.0 workflow Glimpse2LowPassImputation { String pipeline_version = "0.0.6" String quota_consumed_version = "0.0.1" String input_qc_version = "1.0.0" input { Array[String] contigs # this is the path to a directory that contains sites vcf, sites table, and reference chunks file. should end with a "/" String reference_panel_prefix File? input_vcf File? input_vcf_index Array[File]? crams Array[File]? cram_indices Array[String] sample_ids File fasta File fasta_index String output_basename File ref_dict Boolean impute_reference_only_variants = false Boolean call_indels = false # batch size used when calling SplitIntoBatches to make variant calls from the crams Int calling_batch_size = 100 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.0.0" String glimpse_docker = "us.gcr.io/broad-dsde-methods/glimpse:kachulis_ck_bam_reader_retry_cf5822c" } if (defined(input_vcf)) { call CountSamples { input: vcf = select_first([input_vcf]) } } Int n_samples = select_first([CountSamples.nSamples, length(select_first([crams]))]) if (defined(crams)) { if (length(select_first([crams])) > 1) { call SplitIntoBatches { input: batch_size = calling_batch_size, crams = select_first([crams]), cram_indices = select_first([cram_indices]), sample_ids = sample_ids } } } scatter(contig in contigs) { File sites_vcf = reference_panel_prefix + "sites." + contig + ".vcf.gz" File sites_vcf_index =reference_panel_prefix + "sites." + contig + ".vcf.gz.tbi" File sites_table = reference_panel_prefix + "sites_table." + contig + ".gz" File sites_table_index = reference_panel_prefix + "sites_table." + contig + ".gz.tbi" File reference_chunks = reference_panel_prefix + "reference_chunks." + contig + ".txt" if (defined(crams)) { Array[Array[String]] crams_batches = select_first([SplitIntoBatches.crams_batches, [select_first([crams])]]) Array[Array[String]] cram_indices_batches = select_first([SplitIntoBatches.cram_indices_batches, [select_first([cram_indices])]]) Array[Array[String]] sample_ids_batches = select_first([SplitIntoBatches.sample_ids_batches, [select_first([sample_ids])]]) scatter(i in range(length(crams_batches))) { call BcftoolsMpileup { input: crams = crams_batches[i], cram_indices = cram_indices_batches[i], sample_ids = sample_ids_batches[i], fasta = fasta, fasta_index = fasta_index, call_indels = call_indels, sites_vcf = sites_vcf, } call BcftoolsCall { input: mpileup_bcf = BcftoolsMpileup.output_bcf, sites_table = sites_table, sites_table_index = sites_table_index, } call BcftoolsNorm { input: calls_bcf = BcftoolsCall.output_bcf, } } if (length(BcftoolsNorm.output_vcf) > 1) { call BcftoolsMerge { input: vcfs = BcftoolsNorm.output_vcf, vcf_indices = BcftoolsNorm.output_vcf_index, output_basename = output_basename } } File phase_input_vcf = select_first([BcftoolsMerge.merged_vcf, BcftoolsNorm.output_vcf[0], input_vcf]) File phase_input_vcf_index = select_first([BcftoolsMerge.merged_vcf_index, BcftoolsNorm.output_vcf_index[0], input_vcf_index]) } ## this task is used to grab the reference chunk but does not affect memory usage of glimpsePhase. ## still tbd which method makes the most sense cost wise call ComputeShardsAndMemoryPerShard { input: reference_chunks_memory = reference_chunks, n_samples = n_samples } scatter (reference_chunk_index in range(length(ComputeShardsAndMemoryPerShard.reference_chunk_file_paths))) { call GlimpsePhase { input: reference_chunk = ComputeShardsAndMemoryPerShard.reference_chunk_file_paths[reference_chunk_index], input_vcf = phase_input_vcf, input_vcf_index = phase_input_vcf_index, impute_reference_only_variants = impute_reference_only_variants, call_indels = call_indels, sample_ids = sample_ids, fasta = fasta, fasta_index = fasta_index, docker = glimpse_docker } } call GlimpseLigate { input: imputed_chunks = GlimpsePhase.imputed_vcf, imputed_chunks_indices = GlimpsePhase.imputed_vcf_index, output_basename = output_basename, ref_dict = ref_dict, docker = glimpse_docker } Array[File] contig_coverage_metrics = select_all(GlimpsePhase.coverage_metrics) call SelectVariantRecordsOnly { input: vcf = GlimpseLigate.imputed_vcf, vcf_index = GlimpseLigate.imputed_vcf_index, basename = output_basename + "." + contig + ".imputed.only_variants", } call CreateHomRefSitesOnlyVcf { input: vcf = GlimpseLigate.imputed_vcf, vcf_index = GlimpseLigate.imputed_vcf_index, basename = output_basename + "." + contig + ".imputed.only_hom_ref.sites_only", } } call GatherVcfsNoIndex { input: input_vcfs = SelectVariantRecordsOnly.output_vcf, output_vcf_basename = output_basename + ".imputed", gatk_docker = gatk_docker } call CreateVcfIndexAndMd5 { input: vcf_input = GatherVcfsNoIndex.output_vcf, gatk_docker = gatk_docker, preemptible = 0 } call GatherVcfsNoIndex as GatherVcfsNoIndexHomRefOnly { input: input_vcfs = CreateHomRefSitesOnlyVcf.output_vcf, output_vcf_basename = output_basename + ".imputed.hom_ref_sites_only", gatk_docker = gatk_docker } call CreateVcfIndexAndMd5 as CreateVcfIndexAndMd5HomRefOnly { input: vcf_input = GatherVcfsNoIndexHomRefOnly.output_vcf, gatk_docker = gatk_docker, preemptible = 0 } Array[File] genome_coverage_metrics = flatten(contig_coverage_metrics) if (length(genome_coverage_metrics) > 0) { call CombineCoverageMetrics { input: cov_metrics = genome_coverage_metrics, output_basename = output_basename } } call CollectQCMetrics { input: imputed_vcf = GatherVcfsNoIndex.output_vcf, output_basename = output_basename } output { File imputed_vcf = CreateVcfIndexAndMd5.output_vcf File imputed_vcf_index = CreateVcfIndexAndMd5.output_vcf_index File imputed_vcf_md5sum = CreateVcfIndexAndMd5.output_vcf_md5sum File imputed_hom_ref_sites_only_vcf = CreateVcfIndexAndMd5HomRefOnly.output_vcf File imputed_hom_ref_sites_only_vcf_inex = CreateVcfIndexAndMd5HomRefOnly.output_vcf_index File imputed_hom_ref_sites_only_vcf_md5 = CreateVcfIndexAndMd5HomRefOnly.output_vcf_md5sum File qc_metrics = CollectQCMetrics.qc_metrics File? coverage_metrics = CombineCoverageMetrics.coverage_metrics } } task SplitIntoBatches { input { Int batch_size Array[String] crams Array[String] cram_indices Array[String] sample_ids } command <<< cat < script.py import json batch_size = ~{batch_size} crams = ['~{sep="', '" crams}'] cram_indices = ['~{sep="', '" cram_indices}'] sample_ids = ['~{sep="', '" sample_ids}'] crams_batches = [crams[i:i + batch_size] for i in range(0, len(crams), batch_size)] cram_indices_batches = [cram_indices[i:i + batch_size] for i in range(0, len(cram_indices), batch_size)] sample_ids_batches = [sample_ids[i:i + batch_size] for i in range(0, len(sample_ids), batch_size)] with open('crams.json', 'w') as json_file: json.dump(crams_batches, json_file) with open('cram_indices.json', 'w') as json_file: json.dump(cram_indices_batches, json_file) with open('sample_ids.json', 'w') as json_file: json.dump(sample_ids_batches, json_file) EOF python3 script.py >>> runtime { docker: "us.gcr.io/broad-dsde-methods/python-data-slim:1.0" cpu: 1 disks: "local-disk 10 HDD" memory: "1 GiB" preemptible: 3 noAddress: true } output { Array[Array[String]] crams_batches = read_json('crams.json') Array[Array[String]] cram_indices_batches = read_json('cram_indices.json') Array[Array[String]] sample_ids_batches = read_json('sample_ids.json') } } task ComputeShardsAndMemoryPerShard { input { File reference_chunks_memory Int n_samples } command <<< python3 << EOF import pandas as pd import numpy as np df = pd.read_csv('~{reference_chunks_memory}', sep='\t', header=None, names=['contig', 'reference_shard', 'base_gb', 'slope_per_sample_gb']) # write out reference shards to process df['reference_shard'].to_csv('reference_shard_file_paths.tsv', sep='\t', index=False, header=None) # calculate memory usage and save to file df['mem_gb'] = df['base_gb'] + df['slope_per_sample_gb'] * ~{n_samples} df['mem_gb'] = df['mem_gb'].apply(lambda x: min(256, int(np.ceil(x)))) # cap at 256 GB df['mem_gb'].to_csv('memory_per_chunk.tsv', sep='\t', index=False, header=None) EOF >>> runtime { docker : "us.gcr.io/broad-dsde-methods/python-data-slim:1.0" noAddress: true } output { Array[String] reference_chunk_file_paths = read_lines("reference_shard_file_paths.tsv") Array[Int] mem_gb_per_chunk = read_lines("memory_per_chunk.tsv") } } task BcftoolsMpileup { input { Array[File] crams Array[File] cram_indices File fasta File fasta_index Boolean call_indels Array[String] sample_ids File sites_vcf Int seed = 12345 Int mem_gb = 6 Int cpu = 1 Int preemptible = 0 Int max_retries = 3 } Int disk_size_gb = ceil(1.5*size(crams, "GiB") + size(fasta, "GiB") + size(sites_vcf, "GiB")) + 10 command <<< set -xeuo pipefail crams=(~{sep=' ' crams}) sample_ids=(~{sep=' ' sample_ids}) for i in "${!crams[@]}"; do echo "* ${crams[$i]} ${sample_ids[$i]}" >> sample_name_mapping.txt done bcftools mpileup -f ~{fasta} ~{if !call_indels then "-I" else ""} -G sample_name_mapping.txt --seed ~{seed} -E -a 'FORMAT/DP,FORMAT/AD' -T ~{sites_vcf} -Ou -o mpileup.bcf ~{sep=" " crams} >>> runtime { docker: "us.gcr.io/broad-dsde-methods/vcfeval_docker:v1.1" disks: "local-disk " + disk_size_gb + " HDD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries noAddress: true } output { File output_bcf = "mpileup.bcf" } } task BcftoolsCall { input { File mpileup_bcf File sites_table File sites_table_index Int mem_gb = 12 Int cpu = 1 Int preemptible = 3 Int max_retries = 3 } Int disk_size_gb = ceil(3*size(mpileup_bcf, "GiB") + size(sites_table, "GiB")) + 10 command <<< set -xeuo pipefail bcftools call -Aim -C alleles -T ~{sites_table} -Ou ~{mpileup_bcf} -o calls.bcf >>> runtime { docker: "us.gcr.io/broad-dsde-methods/vcfeval_docker:v1.1" disks: "local-disk " + disk_size_gb + " SSD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries noAddress: true } output { File output_bcf = "calls.bcf" } } task BcftoolsNorm { input { File calls_bcf Int mem_gb = 6 Int cpu = 1 Int preemptible = 3 Int max_retries = 3 } Int disk_size_gb = ceil(3*size(calls_bcf, "GiB")) + 10 command <<< set -xeuo pipefail bcftools norm -m -both -Oz -o normalized.vcf.gz ~{calls_bcf} bcftools index -t normalized.vcf.gz >>> runtime { docker: "us.gcr.io/broad-dsde-methods/vcfeval_docker:v1.1" disks: "local-disk " + disk_size_gb + " SSD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries noAddress: true } output { File output_vcf = "normalized.vcf.gz" File output_vcf_index = "normalized.vcf.gz.tbi" } } task BcftoolsMerge { input { Array[File] vcfs Array[File] vcf_indices Int mem_gb = 6 Int cpu = 1 Int preemptible = 0 Int max_retries = 3 String output_basename } Int disk_size_gb = ceil(3*size(vcfs, "GiB")) + 50 command <<< set -euo pipefail bcftools merge -O z -o ~{output_basename}.bcftools.merged.vcf.gz ~{sep=" " vcfs} bcftools index -t ~{output_basename}.bcftools.merged.vcf.gz >>> runtime { docker: "us.gcr.io/broad-dsde-methods/bcftools:v1.3" disks: "local-disk " + disk_size_gb + " HDD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries noAddress: true } output { File merged_vcf = "~{output_basename}.bcftools.merged.vcf.gz" File merged_vcf_index = "~{output_basename}.bcftools.merged.vcf.gz.tbi" } } task GlimpsePhase { input { File? input_vcf File? input_vcf_index Array[File]? crams Array[File]? cram_indices Array[String] sample_ids File? fasta File? fasta_index File reference_chunk Boolean impute_reference_only_variants Boolean call_indels Int? n_burnin Int? n_main Int? effective_population_size Int mem_gb = 16 Int cpu = 4 Int disk_size_gb = ceil(2.2 * size(input_vcf, "GiB") + size(reference_chunk, "GiB") + 0.003 * length(select_first([crams, []])) + 10) Int preemptible = 30 Int max_retries = 3 String docker } parameter_meta { crams: { localization_optional: true } cram_indices: { localization_optional: true } input_vcf: { localization_optional: true } input_vcf_index: { localization_optional: true } } String bam_file_list_input = if defined(crams) then "--bam-list crams.list" else "" command <<< set -euo pipefail export GCS_OAUTH_TOKEN=$(/root/google-cloud-sdk/bin/gcloud auth application-default print-access-token) cram_paths=( ~{sep=" " crams} ) cram_index_paths=( ~{sep=" " cram_indices} ) sample_ids=( ~{sep=" " sample_ids} ) duplicate_cram_filenames=$(printf "%s\n" "${cram_paths[@]}" | xargs -I {} basename {} | sort | uniq -d) if [ ! -z "$duplicate_cram_filenames" ]; then echo "ERROR: The input CRAMs contain multiple files with the same basename, which leads to an error due to the way that htslib is implemented. Duplicate filenames:" printf "%s\n" "${duplicate_cram_filenames[@]}" exit 1 fi if ~{if defined(cram_indices) then "true" else "false"}; then for i in "${!cram_paths[@]}" ; do echo -e "${cram_paths[$i]}##idx##${cram_index_paths[$i]} ${sample_ids[$i]}" >> crams.list done else for i in "${!cram_paths[@]}"; do echo -e "${cram_paths[$i]} ${sample_ids[$i]}" >> crams.list done fi cmd="/bin/GLIMPSE2_phase \ ~{"--input-gl " + input_vcf} \ --reference ~{reference_chunk} \ --output phase_output.bcf \ --threads ~{cpu} \ ~{if impute_reference_only_variants then "--impute-reference-only-variants" else ""} ~{if call_indels then "--call-indels" else ""} \ ~{"--burnin " + n_burnin} ~{"--main " + n_main} \ ~{"--ne " + effective_population_size} \ ~{bam_file_list_input} \ ~{"--fasta " + fasta} \ --checkpoint-file-out checkpoint.bin" if [ -s "checkpoint.bin" ]; then cmd="$cmd --checkpoint-file-in checkpoint.bin" fi #check for read error which corresponds exactly to end of cram/bam block. #This currently triggers a warning message from htslib, but doesn't return any error. #We need to make sure that stderr is maintained since cromwell looks for oom strings #in stderr eval $cmd 2> >(tee glimpse_stderr.log >&2) if grep -q "EOF marker is absent" glimpse_stderr.log; then echo "An input file appears to be truncated. This may be either a truly truncated file which needs to be fixed, or a networking error which can just be retried." exit 1 fi >>> runtime { docker: docker disks: "local-disk " + disk_size_gb + " SSD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries checkpointFile: "checkpoint.bin" noAddress: true } output { File imputed_vcf = "phase_output.bcf" File imputed_vcf_index = "phase_output.bcf.csi" File? coverage_metrics = "phase_output_stats_coverage.txt.gz" } } task GlimpseLigate { input { Array[File] imputed_chunks Array[File] imputed_chunks_indices String output_basename File ref_dict Int mem_gb = 4 Int cpu = 2 Int disk_size_gb = ceil(3 * size(imputed_chunks, "GiB") + 100) Int preemptible = 0 Int max_retries = 3 String docker } command <<< set -xeuo pipefail NPROC=$(nproc) echo "nproc reported ${NPROC} CPUs, using that number as the threads argument for GLIMPSE." /bin/GLIMPSE2_ligate --input ~{write_lines(imputed_chunks)} --output ligated.vcf.gz --threads ${NPROC} # Set correct reference dictionary bcftools view -h --no-version ligated.vcf.gz > old_header.vcf java -jar /picard.jar UpdateVcfSequenceDictionary -I old_header.vcf --SD ~{ref_dict} -O new_header.vcf bcftools reheader -h new_header.vcf -o ~{output_basename}.imputed.vcf.gz ligated.vcf.gz tabix ~{output_basename}.imputed.vcf.gz >>> runtime { docker: docker disks: "local-disk " + disk_size_gb + " HDD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible maxRetries: max_retries noAddress: true } output { File imputed_vcf = "~{output_basename}.imputed.vcf.gz" File imputed_vcf_index = "~{output_basename}.imputed.vcf.gz.tbi" } } task CollectQCMetrics { input { File imputed_vcf String output_basename Int preemptible = 0 String docker = "hailgenetics/hail:0.2.126-py3.11" Int cpu = 4 Int mem_gb = 8 } parameter_meta { imputed_vcf: { localization_optional: true } } Int disk_size_gb = ceil(2*size(imputed_vcf, "GiB") + 50) command <<< set -euo pipefail cat <<'EOF' > script.py import hail as hl import pandas as pd # Calculate metrics hl.init(default_reference='GRCh38', idempotent=True) vcf = hl.import_vcf('~{imputed_vcf}', force_bgz=True) qc = hl.sample_qc(vcf) qc_pd = qc.cols().flatten() \ .rename({'sample_qc.' + col: col for col in list(qc['sample_qc'])}) \ .rename({'s': 'sample_id'}) \ .to_pandas() qc_pd.to_csv('~{output_basename}.qc_metrics.tsv', sep='\t', index=False, float_format='%.4f') EOF python3 script.py >>> runtime { docker: docker disks: "local-disk " + disk_size_gb + " HDD" memory: mem_gb + " GiB" cpu: cpu preemptible: preemptible noAddress: true } output { File qc_metrics = "~{output_basename}.qc_metrics.tsv" } } task CountSamples { input { File vcf String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 3000 Int disk_size_gb = 10 + ceil(size(vcf, "GiB")) } command <<< bcftools query -l ~{vcf} | wc -l >>> runtime { docker: bcftools_docker disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu noAddress: true } output { Int nSamples = read_int(stdout()) } } task CombineCoverageMetrics { input { Array[File] cov_metrics String output_basename } command <<< set -euo pipefail cov_files=( ~{sep=" " cov_metrics} ) for i in "${!cov_files[@]}"; do if [ $i -eq 0 ]; then n_skip=1 echo 'Chunk' > chunk_col.txt else n_skip=2 fi # glimpse coverage metrics are formatted to be human readable in a command line, not machine readable or consistent. ie, number of tabs # are variable between columns depending on length of sample names, odd things like that. We want these to be machine readable tables, # so need to fix this. zcat ${cov_files[$i]} | tail -n +$((n_skip + 1)) | sed s/%//g | sed s/"No data"/"No data pct"/g | sed s/\\t\\t/\\t/g >> cov_file.txt n_lines_cov=$(< cov_file.txt wc -l) n_lines_chunk=$(< chunk_col.txt wc -l) n_lines_out=$((n_lines_cov-n_lines_chunk)) echo 'n_lines_out=' ${n_lines_out} echo ${cov_files[$i]} { yes ${i} || :; } | head -n ${n_lines_out} >> chunk_col.txt done paste chunk_col.txt cov_file.txt > ~{output_basename}.coverage_metrics.txt >>> runtime { docker: "us.gcr.io/broad-dsde-methods/ubuntu:20.04" noAddress: true } output { File coverage_metrics="~{output_basename}.coverage_metrics.txt" } } task GatherVcfsNoIndex { input { Array[File] input_vcfs String output_vcf_basename String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 2 Int memory_mb = 10000 Int disk_size_gb = ceil(3*size(input_vcfs, "GiB")) + 10 } Int command_mem = memory_mb - 1500 Int max_heap = memory_mb - 1000 command <<< set -e -o pipefail gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ GatherVcfs \ -I ~{sep=' -I ' input_vcfs} \ --REORDER_INPUT_BY_FIRST_VARIANT \ -O ~{output_vcf_basename}.vcf.gz >>> runtime { docker: gatk_docker disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu maxRetries: 1 noAddress: true } output { File output_vcf = "~{output_vcf_basename}.vcf.gz" } } task CreateVcfIndexAndMd5 { input { File vcf_input Int disk_size_gb = ceil(1.1*size(vcf_input, "GiB")) + 10 Int cpu = 1 Int memory_mb = 6000 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" Int preemptible = 3 } String vcf_basename = basename(vcf_input, ".vcf.gz") command <<< set -e -o pipefail ln -sf ~{vcf_input} ~{vcf_basename}.vcf.gz bcftools index -t ~{vcf_basename}.vcf.gz md5sum ~{vcf_basename}.vcf.gz | awk '{ print $1 }' > ~{vcf_basename}.md5sum >>> runtime { docker: gatk_docker disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu preemptible: preemptible maxRetries: 1 noAddress: true } output { File output_vcf = "~{vcf_basename}.vcf.gz" File output_vcf_index = "~{vcf_basename}.vcf.gz.tbi" File output_vcf_md5sum = "~{vcf_basename}.md5sum" } } task SelectVariantRecordsOnly { input { File vcf File vcf_index String basename Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 10 Int cpu = 1 Int memory_mb = 3000 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" } Int command_mem = memory_mb - 1500 Int max_heap = memory_mb - 1000 command { set -e -o pipefail # keep alt sites (i.e. remove hom ref sites) bcftools view -i 'GT[*]="alt"' -Oz -o ~{basename}.vcf.gz ~{vcf} } runtime { docker: gatk_docker disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu maxRetries: 1 preemptible: 3 noAddress: true } output { File output_vcf = "~{basename}.vcf.gz" } } task CreateHomRefSitesOnlyVcf { input { File vcf File vcf_index String basename Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 10 Int cpu = 1 Int memory_mb = 6000 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" } Int command_mem = memory_mb - 1500 Int max_heap = memory_mb - 1000 command { set -e -o pipefail # create header with only first 8 columns and store that bcftools view -h ~{vcf} | grep "^##" > ~{basename}.vcf bcftools view -h ~{vcf} | grep -v "^##" | cut -f1-8 >> ~{basename}.vcf # append first 8 columns of hom ref sites to previously stored header bcftools query -e 'GT[*]="alt"' -f '%CHROM\t%POS\t%ID\t%REF\t%ALT\t%QUAL\t%FILTER\t%INFO\n' ~{vcf} >> ~{basename}.vcf bgzip ~{basename}.vcf } runtime { docker: gatk_docker disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu maxRetries: 1 preemptible: 3 noAddress: true } output { File output_vcf = "~{basename}.vcf.gz" } }