#with a first structure tree, use molecular dynamics to simulate frames and then subsample them
#to make a bootstrap for the tree


import snakemake.utils
snakemake.utils.min_version("7.8.0")
snake_dir = workflow.basedir

rootdir = ''.join([ sub + '/' for sub in snake_dir.split('/')[:-1] ] )
print(rootdir)
mattypes = ['foldtree', 'alntmscore', 'lddt']

configfile: rootdir+ "workflow/config/config_vars.yaml"
# remote homologues search parameters

foldseekpath = config["foldseek_path"]
if foldseekpath == 'provided':
	foldseekpath = rootdir + "foldseek/foldseek"

rule all:
	input:
	#get all treescore and rf distance files for all alntypes
		#expand( "{folder}/RFdistances_{exp}_.json" , folder = folders , exp = exp) ,
		expand( "{folder}/plddt.json" , folder = config["folder"] ) ,
		expand("{wildcards.folder}/{wildcards.frame}_struct_tree.PP.nwk.rooted.final", folder = config["folder"], mattype = mattypes),


rule split_frames:
    conda:
        'foldtree'
    input:
        "{folder}/structs/{pdbid}.pdb"
    output:
        "{wildcards.folder}/structs/{wildcards.frame}/{pdbid}.pdb"
    log:
        "{folder}/logs/split_frames.log"
    script:
        "../src/split_frames.py"

rule simulate_frames:
    conda:
        'foldtree'
    input:
        "{folder}/structs/{pdbid}.pdb"
    output:
        "{folder}/structs/{pdbid}.MD.pdb"
    log:
        "{folder}/logs/simulate_frames.log"
    script:
        "../src/simulate_frames.py"

rule mad_root_post:
    
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk.rooted.final"
	log:
		"{folder}/logs/{mattype}_struct_madroot_post.log"
	script:
		'../src/process_madroot.py'
	
rule mad_root_struct:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk.rooted"
	log:
		"{folder}/logs/{mattype}_struct_madroot.log"
	shell:
		rootdir+'madroot/mad {wildcards.folder}/{wildcards.mattype}_struct_tree.PP.nwk'

rule postprocess:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{mattype}_struct_tree.nwk"
	output:
		"{folder}/{mattype}_struct_tree.PP.nwk"
	log:
		"{folder}/logs/{mattype}_struct_postprocess.log"
	script:
		'../src/postprocess.py'

rule quicktree:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{mattype}_fastmemat.txt"
	output:
		"{folder}/{mattype}_struct_tree.nwk"
	log:
		"{folder}/logs/{mattype}_quicktree.log"
	shell:
		'quicktree -i m {wildcards.folder}/{wildcards.mattype}_fastmemat.txt > {wildcards.folder}/{wildcards.mattype}_struct_tree.nwk '



rule foldseek2distmat:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{frame}/allvall_1.csv"
	output:
		"{folder}/{frame}/foldtree_fastmemat.txt",
		"{folder}/{frame}/alntmscore_fastmemat.txt",
		"{folder}/{frame}/lddt_fastmemat.txt",
	log:
		"{folder}/{frame}/logs/foldseek2distmat.log"
	script:
		"../src/foldseekres2distmat_simple.py"

rule foldseek_allvall_1:
	conda:
		#"config/fold_tree.yaml"
		"foldtree"
	input:
		"{folder}/{frame}/finalset.csv"
	output:
		"{folder}/{frame}/allvall_1.csv"
	log:
		"{folder}/{frame}/logs/foldseekallvall.log"
	shell:
		foldseekpath + " easy-search {wildcards.folder}/{wildcards.frame}/structs/ {wildcards.folder}/{wildcards.frame}/structs/ {wildcards.folder}/{wildcards.frame}/allvall_1.csv {wildcards.folder}/{wildcards.frame}/tmp --format-output 'query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,lddt,lddtfull,alntmscore' --exhaustive-search --alignment-type 2 -e inf --threads " + str(config['foldseek_cores']) 


