#!/usr/bin/perl

#This program rum as a standalone at any Mac system.
#Uses:perl subnucpred.pl Input_fasta_file Threshold.
#User can submit more than one protein sequences at a time in fasta format only.
#Result will be formed in Prediction file.

open(SEQ_FILE,"$ARGV[0]") or die "$!";
open(INP,">input_seq.fasta") or die "$!";
while ($in=<SEQ_FILE>)
{
    chomp ($in);
    print INP "$in\n";
}
close INP;
close SEQ_FILE;

system "/usr/bin/tr -d '\r' <input_seq.fasta >input.fasta";
#system "/usr/bin/perl $infut_file/fasta.pl $dir/input_fi.fasta |/usr/bin/head -50 >$dir/input.fasta";
system "/bin/grep -c '>' input.fasta >total_seq";
system "/bin/grep '>' input.fasta |/usr/bin/cut -d '|' -f3 |/usr/bin/cut -d ' ' -f1 >protein_id";
system "/usr/local/bin/hmmscan --domtblout hmm_out -E 1e-5 Pfam/Pfam-A.hmm input.fasta >/dev/null";
system "/usr/bin/perl hmm.pl hmm_out |/bin/grep -v '#' >hmm_out_domain";
system "/usr/bin/perl evalue.pl hmm_out_domain |/usr/bin/cut -d ' ' -f1,2,3 |/usr/bin/tr '|' '#' >hmm_out_domain_evalue";
system "/usr/bin/perl id_compare.pl hmm_out_domain_evalue protein_id |/usr/bin/cut -d ' ' -f1,2 |/usr/bin/sort -u >domain_id";
system "/usr/bin/perl aaseqformat.pl input.fasta |/bin/sed -e 's/^/+1/' >aacomp";
system "/usr/local/bin/svm_classify aacomp Models/model_cent svm_score_cent >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_chromo svm_score_chromo >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_nspeckle svm_score_nspeckle >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_nucleolus svm_score_nucleolus >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_other svm_score_other >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_nenvelop svm_score_nenvelop >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_nmatrix svm_score_nmatrix >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_npc svm_score_npc >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_nplasm svm_score_nplasm >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_pml svm_score_pml >/dev/null";
system "/usr/local/bin/svm_classify aacomp Models/model_tel svm_score_tel >/dev/null";
system "/usr/bin/paste protein_id aacomp svm_score_cent svm_score_chromo svm_score_nspeckle svm_score_nucleolus svm_score_other svm_score_nenvelop svm_score_nmatrix svm_score_npc svm_score_nplasm svm_score_pml svm_score_tel |/usr/bin/tr '\t' '#' >final";
system "/usr/bin/perl domain.pl final domain_id >final_pred";
#$total_seq=`head -1 total_seq`;chomp($total_seq);

open(FINAL_PRED,"final_pred") or die "$!";
@array=<FINAL_PRED>;
close FINAL_PRED;

for($q=0;$q<=$#array;$q++)
{
    @dom=split(/\#/,$array[$q]);
    $a=0;$b=0;$c=0;$d=0;$e=0;$f=0;$g=0;$h=0;$i=0;$j=0;
    open(UNIQUE_DOMAIN,"nuclear_unique_domain") or die "$!";
    while($domainfile=<UNIQUE_DOMAIN>)
    {
	chomp($domainfile);
	@domain=split(/\#/,$domainfile);
	for($x=13;$x<=$#dom;$x++)
	{
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "CENTROMERE"))
	    {
		$a=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "CHROMOSOME"))
	    {
		$b=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NSPECKLE"))
	    {
		$c=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NUCLEOLUS"))
	    {
		$d=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NENVELOP"))
	    {
		$e=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NMATRIX"))
	    {
		$f=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NPC"))
	    {
		$g=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "NPLASM"))
	    {
		$h=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "PML"))
	    {
		$i=100;
	    }
	    if(("$dom[$x]" eq "$domain[0]")&&("$domain[1]" eq "TELOMERE"))
	    {
		$j=100;
	    }
	}
    }
    close UNIQUE_DOMAIN;
    if($a==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#CENTROMERE#Domain_based_prediction\n";
	close RESULT;
    }
    if($b==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#CHROMOSOME#Domain_based_prediction\n";
	close RESULT;
    }
    if($c==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEAR_SPECKLE#Domain_based_prediction\n";
	close RESULT;
    }
    if($d==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEOLUS#Domain_based_prediction\n";
	close RESULT;
    }
    if($e==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEAR_ENVELOPE#Domain_based_prediction\n";
	close RESULT;
    }
    if($f==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEAR_MATRIX#Domain_based_prediction\n";
	close RESULT;
    }
    if($g==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEAR_PORE_COMPLEX#Domain_based_prediction\n";
	close RESULT;
    }
    if($h==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#NUCLEOPLASM#Domain_based_prediction\n";
	close RESULT;
    }
    if($i==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#PML#Domain_based_prediction\n";
	close RESULT;
    }
    if($j==100)
    {
	open(RESULT,">>Hybrid_result") or die "$!";
	print RESULT "$dom[0]#TELOMERE#Domain_based_prediction\n";
	close RESULT;
    }
    
    if(($a==0)&&($b==0)&&($c==0)&&($d==0)&&($e==0)&&($f==0)&&($g==0)&&($h==0)&&($i==0)&&($j==0))
    {
	open(ID,">>Hybrid_result") or die "$!";
	print ID "$dom[0]#CENTROMERE#$dom[2]#CHROMOSOME#$dom[3]#NUCLEAR_SPECKLE#$dom[4]#NUCLEOLUS#$dom[5]#OTHER#$dom[6]#NUCLEAR_ENVELOPE#$dom[7]#NUCLEAR_MATRIX#$dom[8]#NUCLEAR_PORE_COMPLEX#$dom[9]#NUCLEOPLASM#$dom[10]#PML body#$dom[11]#TELOMERE#$dom[12]\n";
	close ID;
    }
}
open(NE,"Hybrid_result") or die "$!";
while($tim=<NE>)
{
    chomp($tim);
    if($tim =~ m/Domain_based_prediction$/)	 
    {
	open(YY,">>result_both") or die "$!";
	print YY "$tim\n";
	close YY;
    }
    if($tim!~ m/Domain_based_prediction$/)
    {
	@km=split(/\#/,$tim);
	open(YY,">>result_both") or die "$!";
	print YY "$km[0]#";
	close YY;
	if($km[2] >= $ARGV[1])
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[1]#$km[2]#";
	    close YY;
	    #print "====if($km[2] >= $ARGV[1])\n";
	}
	if($km[4] >= $ARGV[1])
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[3]#$km[4]#";
	    close YY;
	}
	if($km[6] >= $ARGV[1])
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[5]#$km[6]#";
	    close YY;
	}
	if($km[8] >= $ARGV[1])
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[7]#$km[8]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[12] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[11]#$km[12]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[14] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[13]#$km[14]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[16] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[15]#$km[16]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[18] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[17]#$km[18]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[20] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[19]#$km[20]#";
	    close YY;
	}
	if(($km[10] >= $ARGV[1])&&($km[22] >= $ARGV[1]))
	{
	    open(YY,">>result_both") or die "$!";
	    print YY "$km[21]#$km[22]#";
	    close YY;
	}
	open(YY,">>result_both") or die "$!";
	print YY "\n";
	close YY;
    }
}

open(RR,">>Prediction") or die "$!";
print RR "Protein_id\tSubnuclear Location\tSVM_score\n";
close RR;
open(PREDICTION,"result_both") or die "$!";
while($pre=<PREDICTION>)
{
    chomp($pre);
    @pred=split(/\#/,$pre);
    $no=0;
    for($kk=0;$kk<$#pred;$kk++)
    {
	$no++;
    }
    $mo=$no/2;

    if($pre =~ m/Domain_based_prediction$/)
    {
	open(RR,">>Prediction") or die "$!";
	print RR "$pred[0]\t$pred[1]\t$pred[2]\n";
	close RR;
    }
    if($pre!~ m/Domain_based_prediction$/)
    {
	
	if($no==0)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\tNo_Prediction\n";
	    close RR;
	}
	if($no==2)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n";
	    close RR;
	}
	if($no==4)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n";
	    close RR;
	}
	if($no==6)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n";
	    close RR;
	}
	if($no==8)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n";
	    close RR;
	}
	if($no==10)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n";
	    close RR;
	}
	if($no==12)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n\t$pred[11]\t$pred[12]\n";
	    close RR;
	}
	if($no==14)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n\t$pred[11]\t$pred[12]\n\t$pred[13]\t$pred[14]\n";
	    close RR;
	}
	if($no==16)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n\t$pred[11]\t$pred[12]\n\t$pred[13]\t$pred[14]\n\t$pred[15]\t$pred[16]\n";
	    close RR;
	}
	if($no==18)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n\t$pred[11]\t$pred[12]\n\t$pred[13]\t$pred[14]\n\t$pred[15]\t$pred[16]\n\t$pred[17]\t$pred[18]\n";
	    close RR;
	}
	if($no==20)
	{
	    open(RR,">>Prediction") or die "$!";
	    print RR "$pred[0]\t$pred[1]\t$pred[2]\n\t$pred[3]\t$pred[4]\n\t$pred[5]\t$pred[6]\n\t$pred[7]\t$pred[8]\n\t$pred[9]\t$pred[10]\n\t$pred[11]\t$pred[12]\n\t$pred[13]\t$pred[14]\n\t$pred[15]\t$pred[16]\n\t$pred[17]\t$pred[18]\n\t$pred[19]\t$pred[20]\n";
	    close RR;
	}
    }
}
system "rm Hybrid_result aacomp domain_id final* hmm_out* input.fasta input_seq.fasta protein_id result_both svm_score_* total_seq";
