version 1.0 import "../../pipelines/wdl/arrays/imputation_beagle/input_qc/ArrayImputationQC.wdl" as ArrayImputationQC import "../../verification/VerifyArrayImputationQC.wdl" as VerifyArrayImputationQC import "../../tasks/wdl/Utilities.wdl" as Utilities import "../../tasks/wdl/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestArrayImputationQC { input { Int chunkLength = 25000000 Int chunkOverlaps = 5000000 # this is the padding that will be added to the beginning and end of each chunk to reduce edge effects Int sample_chunk_size = 1000 # this is the number of samples that will be processed in parallel in each chunked scatter Float min_dr2_for_inclusion = 0.0 # minimum imputation quality (DR2) for a variant to be included in the output VCF File multi_sample_vcf File ref_dict # for reheadering / adding contig lengths in the header of the ouptut VCF, and calculating contig lengths Array[String] contigs # list of possible contigs that will be processed. note the workflow will not error out if any of these contigs are missing String reference_panel_path_prefix # path + file prefix to the bucket where the reference panel files are stored for all contigs String genetic_maps_path # path to the bucket where genetic maps are stored for all contigs String output_basename # the basename for intermediate and output files # These values will be determined and injected into the inputs by the scala test framework String truth_path String results_path Boolean update_truth } meta { allowNestedInputs: true } call ArrayImputationQC.InputQC { input: chunkLength = chunkLength, chunkOverlaps = chunkOverlaps, multi_sample_vcf = multi_sample_vcf, ref_dict = ref_dict, contigs = contigs, reference_panel_path_prefix = reference_panel_path_prefix, genetic_maps_path = genetic_maps_path, output_basename = output_basename, min_dr2_for_inclusion = min_dr2_for_inclusion, } # Write pipeline outputs into json file so we can compare to truth call WriteMapToTsv { input: input_map = { "passes_qc": InputQC.passes_qc, "qc_messages": InputQC.qc_messages } } # Copy results of pipeline to test results bucket call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = [WriteMapToTsv.tsv_file], destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = [WriteMapToTsv.tsv_file], destination_cloud_path = truth_path } } # This is achieved by passing each desired file/array[files] to GetValidationInputs if (!update_truth){ call Utilities.GetValidationInputs as GetOutputs { input: input_file = WriteMapToTsv.tsv_file, results_path = results_path, truth_path = truth_path } call VerifyArrayImputationQC.VerifyArrayImputationQC as Verify { input: truth_outputs = GetOutputs.truth_file, test_outputs = GetOutputs.results_file, done = CopyToTestResults.done } } } # Write a tsv file from a map of strings task WriteMapToTsv { input { Map[String, String] input_map } command <<< cp ~{write_map(input_map)} output.tsv >>> runtime { docker: "us.gcr.io/broad-dsp-gcr-public/base/python:3.9-debian" memory: "2 GiB" } output { File tsv_file = "output.tsv" } }