
#once snakemake is installed use the following command to test the struct tree
import snakemake.utils
snakemake.utils.min_version("7.8.0")
snake_dir = workflow.basedir

rootdir = ''.join([ sub + '/' for sub in snake_dir.split('/')[:-1] ] )


#glob wildcards for all OGs

OGs = glob_wildcards(config['folder'] + "/{og}.fasta")

print(rootdir)
print(folders[:100], '...')


mattypes = ['foldtree', 'alntmscore', 'lddt']
configfile: rootdir+ "workflow/config/config_vars.yaml"
# remote homologues search parameters
foldseekpath = config["foldseek_path"]
if foldseekpath == 'provided':
	foldseekpath = rootdir + "foldseek/foldseek"

rule all:
	input:
	#get all treescore and rf distance files for all alntypes
		expand( "{folder}/plddt.json" , folder = config["folder"] ) ,
		expand("{folder}/{mattype}_struct_tree.PP.nwk.rooted.final", folder = config["folder"], mattype = mattypes),
        "{folder}/astral_tree_foldtree.nwk" , folder = config["folder"]

rule Astral_foldtree:
    conda : 
        "astral"
    input:
		"{folder}/concatenated_tree.nwk"
    output:
        "{folder}/astral_tree_foldtree.nwk"
    log:
        "{folder}/logs/astral_foldtree.log"
    shell:
		'java -jar {config["astral_path"]} -i {wildcards.folder}/concatenated_tree.nwk -o {wildcards.folder}/astral_tree_foldtree.nwk'

rule concatenate_trees:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		expand("{folder}/{og}_{mattype}_struct_tree.PP.nwk.rooted.final", folder = config["folder"], mattype = mattypes)
	output:
		"{folder}/concatenated_tree.nwk"
	log:
		"{folder}/logs/concatenate_trees.log"
	script:
		'../src/concatenate_trees.py'
	

rule mad_root_post:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{og}_{mattype}_struct_tree.PP.nwk.rooted.final"
	log:
		"{folder}/logs/{og}_{mattype}_struct_madroot_post.log"
	script:
		'../src/process_madroot.py'
	
rule mad_root_struct:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{og}_{mattype}_struct_tree.PP.nwk.rooted"
	log:
		"{folder}/logs/{og}_{mattype}_struct_madroot.log"
	shell:
		rootdir+'madroot/mad {wildcards.folder}/{wildcards.og}_{wildcards.mattype}_struct_tree.PP.nwk'

rule postprocess:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_{mattype}_struct_tree.nwk"
	output:
		"{folder}/{og}_{mattype}_struct_tree.PP.nwk"
	log:
		"{folder}/logs/{og}_{mattype}_struct_postprocess.log"
	script:
		'../src/postprocess.py'

rule quicktree:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_{mattype}_fastmemat.txt"
	output:
		"{folder}/{og}_{mattype}_struct_tree.nwk"
	log:
		"{folder}/logs/{og}_{mattype}_quicktree.log"
	shell:
		'quicktree -i m {wildcards.folder}/{og}_{wildcards.mattype}_fastmemat.txt > {wildcards.folder}/{wildcards.mattype}_struct_tree.nwk '

rule foldseek2distmat:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/allvall_1.csv"
	output:
		"{folder}/foldtree_fastmemat.txt",
		"{folder}/alntmscore_fastmemat.txt",
		"{folder}/lddt_fastmemat.txt",
	log:
		"{folder}/logs/foldseek2distmat.log"
	script:
		"../src/foldseekres2distmat_simple.py"

rule foldseek_allvall_1:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_finalset.csv"
	output:
		"{folder}/{og}_allvall_1.csv"
	log:
		"{folder}/logs/foldseekallvall.log"
	shell:
		foldseekpath + " easy-search {wildcards.folder}/{og}_structs/ {wildcards.folder}/{og}_structs/ {wildcards.folder}/{og}_allvall_1.csv {wildcards.folder}/{og}_tmp --format-output 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,lddt,lddtfull,alntmscore' --exhaustive-search --alignment-type 2 -e inf" 

rule dl_ids_sequences:
	conda: 
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		ids="{folder}/{og}_identifiers.txt",
	output:
		"{folder}/{og}_sequence_dataset.csv",
	log:
		"{folder}/logs/{og}_dlsequences.log"
	params:
		custom_structs=False
	script:
		"../src/dl_sequences_ogs.py"

rule plddt:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{og}_finalset.csv",
	output:
		"{folder}/{og}_plddt.json",
	log:
		"{folder}/logs/{og}_plddt.log"
	script:
		'../src/grabplddt.py'

rule dl_ids_structs:
	input:
		"{folder}/sequence_dataset.csv",
	output:
		"{folder}/{og}_sequences.fst",
		"{folder}/{og}_finalset.csv",
	conda: 
		#"config/fold_tree.yaml"
		"foldtree",
	log:
		"{folder}/logs/{og}_dlstructs.log",
	params:
		filtervar=config["filter"],
		filtervar_min=config["filter_min"],
		filtervar_avg=config["filter_avg"],
		custom_structs=config["custom_structs"],
	script:
		"../src/dl_structs.py"
