
#once snakemake is installed use the following command to test the struct tree
#import snakemake.utils
#snakemake.utils.min_version("7.8.0")

snake_dir = workflow.basedir

rootdir = ''.join([ sub + '/' for sub in snake_dir.split('/')[:-1] ] )
print(rootdir)
mattypes = ['foldtree', 'alntmscore', 'lddt']

configfile: rootdir+ "workflow/config/config_vars.yaml"
# remote homologues search parameters

foldseekpath = config["foldseek_path"]
if foldseekpath == 'provided':
	foldseekpath = rootdir + "foldseek/foldseek"

rule all:
	input:
	#get all treescore and rf distance files for all alntypes
		#expand( "{folder}/RFdistances_{exp}_.json" , folder = folders , exp = exp) ,
		expand("{folder}/plddt.json" , folder = config["folder"]) ,
		expand("{folder}/{mattype}_struct_tree.PP.nwk.rooted.final", folder = config["folder"], mattype = mattypes),

rule mad_root_post:
	conda:
		"envs/trees.yaml"
	input:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk.rooted.final"
	log:
		"{folder}/logs/{mattype}_struct_madroot_post.log"
	script:
		'../process_madroot.py'
	
rule mad_root_struct:
	conda:
		"config/foldtree.yaml"
	input:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk.rooted"
	log:
		"{folder}/logs/{mattype}_struct_madroot.log"
	shell:
		rootdir+'madroot/mad {wildcards.folder}/{wildcards.mattype}_struct_tree.PP.nwk'

rule postprocess:
	conda:
		"envs/trees.yaml"
	input:
		"{folder}/{mattype}_struct_tree.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	log:
		"{folder}/logs/{mattype}_struct_postprocess.log"
	script:
		'../postprocess.py'

rule quicktree:
	conda:
		"envs/trees.yaml"
	input:
		"{folder}/{mattype}_fastmemat.txt"
	output:
		"{folder}/{mattype}_struct_tree.nwk"
	log:
		"{folder}/logs/{mattype}_quicktree.log"
	shell:
		'quicktree -i m {wildcards.folder}/{wildcards.mattype}_fastmemat.txt > {wildcards.folder}/{wildcards.mattype}_struct_tree.nwk '

rule foldseek2distmat:
	conda:
		"envs/foldseek.yaml"
	input:
		"{folder}/allvall_1.csv"
	output:
		"{folder}/foldtree_fastmemat.txt",
		"{folder}/alntmscore_fastmemat.txt",
		"{folder}/lddt_fastmemat.txt",
	params:
		fmt = None,
	log:
		"{folder}/logs/foldseek2distmat.log"
	script:
		"../foldseekres2distmat_simple.py"

rule foldseek_allvall_1:
	conda:
		"envs/foldseek.yaml"
	input:
		"{folder}/finalset.csv"
	output:
		"{folder}/allvall_1.csv"
	log:
		"{folder}/logs/foldseekallvall.log"
	shell:
		foldseekpath + " easy-search {wildcards.folder}/structs/ {wildcards.folder}/structs/ {wildcards.folder}/allvall_1.csv {wildcards.folder}/tmp --format-output 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,lddt,lddtfull,alntmscore' --exhaustive-search --alignment-type 2 -e inf --threads " + str(config['foldseek_cores']) 

rule dl_ids_sequences:
	conda: 
		"envs/seqs.yaml"
	input:
		ids="{folder}/identifiers.txt",
	output:
		"{folder}/sequence_dataset.csv",
	log:
		"{folder}/logs/dlsequences.log"
	params:
		cath=config["cath"],
		custom_structs=config["custom_structs"],
		clean_folder=config["clean_folder"],
	script:
		"../dl_sequences.py"

rule plddt:
	conda:
		"envs/seqs.yaml"
	input:
		"{folder}/finalset.csv",
	output:
		"{folder}/plddt.json",
	log:
		"{folder}/logs/plddt.log"
	script:
		'../grabplddt.py'

rule dl_ids_structs:
	conda:
		"envs/seqs.yaml"
	input:
		"{folder}/sequence_dataset.csv",
	output:
		#"{folder}/sequences.fst",
		"{folder}/finalset.csv",
	log:
		"{folder}/logs/dlstructs.log",
	params:
		filtervar=config["filter"],
		cath=False,
		filtervar_min=config["filter_min"],
		filtervar_avg=config["filter_avg"],
		custom_structs=config["custom_structs"],
		clean_folder=config["clean_folder"],
	script:
		"../dl_structs.py"