version 1.0


import "../../pipelines/wdl/arrays/imputation_beagle/ImputationBeagle.wdl" as ImputationBeagle
import "../../verification/VerifyImputationBeagle.wdl" as VerifyImputationBeagle
import "../../tasks/wdl/Utilities.wdl" as Utilities
import "../../tasks/wdl/TerraCopyFilesFromCloudToCloud.wdl" as Copy

workflow TestImputationBeagle {

    input {
      Int chunkLength = 25000000
      Int chunkOverlaps = 5000000 # this is the padding that will be added to the beginning and end of each chunk to reduce edge effects
      Int sample_chunk_size = 1000 # this is the number of samples that will be processed in parallel in each chunked scatter
      Float min_dr2_for_inclusion = 0.0 # minimum imputation quality (DR2) for a variant to be included in the output VCF
      
      File multi_sample_vcf
      
      File ref_dict # for reheadering / adding contig lengths in the header of the ouptut VCF, and calculating contig lengths
      Array[String] contigs # list of possible contigs that will be processed. note the workflow will not error out if any of these contigs are missing
      String reference_panel_path_prefix # path + file prefix to the bucket where the reference panel files are stored for all contigs
      String genetic_maps_path # path to the bucket where genetic maps are stored for all contigs
      String output_basename # the basename for intermediate and output files

      # These values will be determined and injected into the inputs by the scala test framework
      String truth_path
      String results_path
      Boolean update_truth
    }

    meta {
      allowNestedInputs: true
    }
  
    call ImputationBeagle.ImputationBeagle {
      input:
        chunkLength = chunkLength,
        chunkOverlaps = chunkOverlaps,
        sample_chunk_size = sample_chunk_size,
        min_dr2_for_inclusion = min_dr2_for_inclusion,
        multi_sample_vcf = multi_sample_vcf,
        ref_dict = ref_dict,
        contigs = contigs,
        reference_panel_path_prefix = reference_panel_path_prefix,
        genetic_maps_path = genetic_maps_path,
        output_basename = output_basename,
    }

    
    # Collect all of the pipeline outputs into single Array[String]
    Array[String] pipeline_outputs = flatten([
                                    [ # File outputs
                                    ImputationBeagle.imputed_multi_sample_vcf,
                                    ImputationBeagle.imputed_multi_sample_vcf_index,
                                    ]
    ])

    
    # Collect all of the pipeline metrics into single Array[String]
    Array[String] pipeline_metrics = flatten([
                                    [ # File outputs
                                    ImputationBeagle.chunks_info,
                                    ImputationBeagle.contigs_info,
                                    ]
    ])

    # Copy results of pipeline to test results bucket
    call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults {
      input:
        files_to_copy             = flatten([pipeline_outputs, pipeline_metrics]),
        destination_cloud_path    = results_path
    }
  
    # If updating truth then copy output to truth bucket
    if (update_truth){
      call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth {
        input: 
          files_to_copy             = flatten([pipeline_outputs, pipeline_metrics]),
          destination_cloud_path    = truth_path
      }
    }

    # This is achieved by passing each desired file/array[files] to GetValidationInputs
    if (!update_truth){
        call Utilities.GetValidationInputs as GetMetrics {
          input:
            input_files = pipeline_metrics,
            results_path = results_path,
            truth_path = truth_path
        }
        call Utilities.GetValidationInputs as GetVcf {
          input:
            input_file = ImputationBeagle.imputed_multi_sample_vcf,
            results_path = results_path,
            truth_path = truth_path
        }
        call Utilities.GetValidationInputs as GetVcfIndex {
          input:
            input_file = ImputationBeagle.imputed_multi_sample_vcf_index,
            results_path = results_path,
            truth_path = truth_path
        }


      call VerifyImputationBeagle.VerifyImputationBeagle as Verify {
        input:
          truth_metrics = GetMetrics.truth_files, 
          test_metrics = GetMetrics.results_files,
          truth_vcf = GetVcf.truth_file, 
          test_vcf = GetVcf.results_file,
          truth_vcf_index = GetVcfIndex.truth_file, 
          test_vcf_index = GetVcfIndex.results_file,
          done = CopyToTestResults.done
      }
    }
}