version 1.0

import "Structs.wdl"
import "ShardedAnnotateVcf.wdl" as sharded_annotate_vcf
import "TasksMakeCohortVcf.wdl" as MiniTasks

workflow AnnotateVcf {

  input {
    File vcf  # GATK-SV VCF for annotation. Index .tbi must be located at the same path
    File contig_list  # Ordered list of contigs to annotate that are present in the input VCF
    String prefix

    File? protein_coding_gtf  # Provide at least one of protein_coding_gtf or noncoding_bed to perform functional annotation
    File? noncoding_bed
    Int? promoter_window
    Int? max_breakend_as_cnv_length
    String? svannotate_additional_args

    File? sample_pop_assignments  # Two-column file with sample ID & pop assignment. "." for pop will ignore sample
    File? sample_keep_list              # List of samples to be retained from the output vcf
    File? ped_file                # Used for M/F AF calculations
    File? par_bed
    File? allosomes_list
    Int   sv_per_shard

    File? external_af_ref_bed              # File with external allele frequencies
    String? external_af_ref_prefix         # prefix name for external AF call set (required if ref_bed set)
    Array[String]? external_af_population  # populations to annotate external AF for (required if ref_bed set)

    String sv_pipeline_docker
    String sv_base_mini_docker
    String gatk_docker

    RuntimeAttr? runtime_attr_svannotate
    RuntimeAttr? runtime_attr_scatter_vcf
    RuntimeAttr? runtime_attr_subset_vcf_by_samples_list
    RuntimeAttr? runtime_attr_compute_AFs
    RuntimeAttr? runtime_attr_modify_vcf
    RuntimeAttr? runtime_attr_split_ref_bed
    RuntimeAttr? runtime_attr_split_query_vcf
    RuntimeAttr? runtime_attr_bedtools_closest
    RuntimeAttr? runtime_attr_select_matched_svs
    RuntimeAttr? runtime_attr_concat
    RuntimeAttr? runtime_attr_preconcat
    RuntimeAttr? runtime_attr_fix_header
  }

  Array[String] contigs = read_lines(contig_list)

  scatter (contig in contigs) {
    call sharded_annotate_vcf.ShardedAnnotateVcf {
      input:
        vcf = vcf,
        vcf_idx = vcf + ".tbi",
        contig = contig,
        prefix = prefix,
        protein_coding_gtf = protein_coding_gtf,
        noncoding_bed = noncoding_bed,
        promoter_window = promoter_window,
        svannotate_additional_args = svannotate_additional_args,
        max_breakend_as_cnv_length = max_breakend_as_cnv_length,

        sample_pop_assignments = sample_pop_assignments,
        sample_keep_list = sample_keep_list,
        ped_file = ped_file,
        par_bed = par_bed,
        sv_per_shard = sv_per_shard,
        allosomes_list = allosomes_list,

        ref_bed = external_af_ref_bed,
        ref_prefix = external_af_ref_prefix,
        population = external_af_population,

        gatk_docker = gatk_docker,
        sv_pipeline_docker = sv_pipeline_docker,
        sv_base_mini_docker = sv_base_mini_docker,

        runtime_attr_svannotate = runtime_attr_svannotate,
        runtime_attr_scatter_vcf = runtime_attr_scatter_vcf,
        runtime_attr_subset_vcf_by_samples_list = runtime_attr_subset_vcf_by_samples_list,
        runtime_attr_compute_AFs  = runtime_attr_compute_AFs,
        runtime_attr_modify_vcf = runtime_attr_modify_vcf,
        runtime_attr_split_ref_bed  = runtime_attr_split_ref_bed,
        runtime_attr_split_query_vcf  = runtime_attr_split_query_vcf,
        runtime_attr_bedtools_closest = runtime_attr_bedtools_closest,
        runtime_attr_select_matched_svs = runtime_attr_select_matched_svs
    }
  }

  # ShardedAnnotateVcf.sharded_annotated_vcf is is an Array[Array[File]] with one inner Array[File] of shards per contig
  Array[File] vcfs_for_concatenation = flatten(ShardedAnnotateVcf.sharded_annotated_vcf)
  Array[File] vcf_idxs_for_concatenation = flatten(ShardedAnnotateVcf.sharded_annotated_vcf_idx)

  call MiniTasks.ConcatVcfs {
    input:
      vcfs=vcfs_for_concatenation,
      vcfs_idx=vcf_idxs_for_concatenation,
      allow_overlaps=true,
      outfile_prefix="~{prefix}.annotated",
      sv_base_mini_docker=sv_base_mini_docker,
      runtime_attr_override=runtime_attr_concat
  }

  output {
    File annotated_vcf = ConcatVcfs.concat_vcf
    File annotated_vcf_index = ConcatVcfs.concat_vcf_idx
  }
}