version 1.0

workflow pharmcat_pipeline {
  meta {
    author: "ClinPGx"
    email: "pharmcat@clinpgx.org"
    description: "This workflow runs a VCF file through the PharmCAT pipeline."
  }

  parameter_meta {
    # description for this is intentionally different from pipeline script because it's hard to
    # support a file of files on cloud services and directories aren't supported
    vcf_file: "A VCF file (can be gzipped or bgzipped)."
    sample_ids: "A comma-separated list of sample IDs.  Only applicable if you have multiple samples and only want to work on specific ones."
    sample_file: "A file containing a list of sample IDs, one sample ID per line.  Only applicable if you have multiple samples and only want to work on specific ones."

    missing_to_ref: 'Assume genotypes at absent or unspecified PGx sites are "0/0".  DANGEROUS!'
    absent_to_ref: 'Assume genotypes at absent PGx sites are "0/0".  DANGEROUS!'
    unspecified_to_ref: 'Assume unspecified genotypes ("./.") are "0/0" when every sample is "./.". DANGEROUS!'
    no_gvcf_check: "Bypass check if VCF file is in gVCF format."
    # not including retain_specific_regions and reference_regions

    run_matcher: "Run named allele matcher independently."
    matcher_all_results: "Return all possible diplotypes, not just top hits."
    matcher_save_html: "Save named allele matcher results as HTML.'"
    research_mode: "Comma-separated list of research features to enable: [cyp2d6, combinations]"

    run_phenotyper: "Run phenotyper independently."

    run_reporter: "Run reporter independently."
    reporter_sources: "Comma-separated list of sources to limit recommendations to: [CPIC, DPWG, FDA]"
    reporter_extended: "Write an extended report (includes all possible genes and drugs, even if no data is available)"
    reporter_save_html: "Save reporter results as HTML (the default if no format is specified)."
    reporter_save_json: "Save reporter results as JSON."
    reporter_save_calls_only_tsv: "Save call results only as TSV."

    base_filename: "Prefix for output files.  Defaults to the same base name as the input."
    delete_intermediate_files: "Delete intermediate PharmCAT files.  Defaults to saving all files."

    max_concurrent_processes: "The maximum number of processes to use when concurrent mode is enabled."
    max_memory: "The maximum memory PharmCAT should use (e.g. '64G')."
  }


  input {
    File vcf_file
    String sample_ids = ""
    File? sample_file
    Boolean missing_to_ref = false
    Boolean absent_to_ref = false
    Boolean unspecified_to_ref = false
    Boolean no_gvcf_check = false
    Boolean run_matcher = false
    Boolean matcher_all_results = false
    Boolean matcher_save_html = false
    String research_mode = ""
    Boolean run_phenotyper = false
    Boolean run_reporter = false
    String reporter_sources = ""
    Boolean reporter_extended = false
    Boolean reporter_save_html = false
    Boolean reporter_save_json = false
    Boolean reporter_save_calls_only_tsv = false
    String base_filename = ""
    Boolean delete_intermediate_files = false
    Int max_concurrent_processes = 1
    String max_memory = "4G"
  }

  call pharmcat_pipeline_task {
    input:
      vcf_file = vcf_file,
      sample_ids = sample_ids,
      sample_file = sample_file,
      missing_to_ref = missing_to_ref,
      absent_to_ref = absent_to_ref,
      unspecified_to_ref = unspecified_to_ref,
      no_gvcf_check = no_gvcf_check,
      run_matcher = run_matcher,
      matcher_all_results = matcher_all_results,
      matcher_save_html = matcher_save_html,
      research_mode = research_mode,
      run_phenotyper = run_phenotyper,
      run_reporter = run_reporter,
      reporter_sources = reporter_sources,
      reporter_extended = reporter_extended,
      reporter_save_html = reporter_save_html,
      reporter_save_json = reporter_save_json,
      reporter_save_calls_only_tsv = reporter_save_calls_only_tsv,
      base_filename = base_filename,
      delete_intermediate_files = delete_intermediate_files,
      max_concurrent_processes = max_concurrent_processes,
      max_memory = max_memory
  }

  output {
    Array[File] results = pharmcat_pipeline_task.results
  }
}


task pharmcat_pipeline_task {
  meta {
    author: "ClinPGx"
    email: "pharmcat@clinpgx.org"
    description: "This task run a VCF file through the PharmCAT pipeline."
  }

  input {
    File vcf_file
    String sample_ids = ""
    File? sample_file
    Boolean missing_to_ref = false
    Boolean absent_to_ref = false
    Boolean unspecified_to_ref = false
    Boolean no_gvcf_check = false
    Boolean run_matcher = false
    Boolean matcher_all_results = false
    Boolean matcher_save_html = false
    String research_mode = ""
    Boolean run_phenotyper = false
    Boolean run_reporter = false
    String reporter_sources = ""
    Boolean reporter_extended = false
    Boolean reporter_save_html = false
    Boolean reporter_save_json = false
    Boolean reporter_save_calls_only_tsv = false
    String base_filename = ""
    Boolean delete_intermediate_files = false
    Int max_concurrent_processes = 1
    String max_memory = "4G"
  }

  command <<<
    set -x -e -o pipefail
    mkdir -p data
    cp ~{vcf_file} data/

    pharmcat_pipeline data/$(basename ~{vcf_file}) \
    ~{if sample_ids != "" then '-s ' + sample_ids else ''} \
    ~{if defined(sample_file) then '-S ' + sample_file else ''} \
    ~{if missing_to_ref then '-0' else ''} \
    ~{if absent_to_ref then '--absent-to-ref' else ''} \
    ~{if unspecified_to_ref then '--unspecified-to-ref' else ''} \
    ~{if no_gvcf_check then '-G' else ''} \
    ~{if run_matcher then '-matcher' else ''} \
    ~{if matcher_all_results then '-ma' else ''} \
    ~{if matcher_save_html then '-matcherHtml' else ''} \
    ~{if research_mode != "" then '-research ' + research_mode else ''} \
    ~{if run_phenotyper then '-phenotyper' else ''} \
    ~{if run_reporter then '-reporter' else ''} \
    ~{if reporter_sources != "" then '-rs ' + reporter_sources else ''} \
    ~{if reporter_extended then '-re' else ''} \
    ~{if reporter_save_html then '-reporterHtml' else ''} \
    ~{if reporter_save_json then '-reporterJson' else ''} \
    ~{if reporter_save_calls_only_tsv then 'reporterCallsOnlyTsv' else ''} \
    ~{if base_filename != "" then '-bf ' + base_filename else ''} \
    ~{if delete_intermediate_files then '-del' else ''} \
    -cp ~{max_concurrent_processes} -cm ~{max_memory}
  >>>

  output {
    Array[File] results = glob("data/*")
  }

  runtime {
    docker: "pgkb/pharmcat:3.2.0"
    memory: max_memory
    cpu: max_concurrent_processes
  }
}