open (IN1,"<","$ARGV[0]");#gff3 #chr_001 Manual gene 256 1182 . + . ID=g18410;Name=TTHERM_00161861;UserNamed=Unnamed;TGDAnno=Unknow;info=hypothetical protein; #chr_001 Manual mRNA 256 1182 . + . ID=g18410.t1;Parent=g18410; #chr_001 Manual five_prime_UTR 256 333 . + . ID=g18410.t1.utr;Parent=g18410.t1; #chr_001 Manual CDS 334 342 . + . ID=g18410.t1.cds;Parent=g18410.t1; #chr_001 Manual CDS 394 453 . + . ID=g18410.t1.cds;Parent=g18410.t1; #chr_001 Manual CDS 505 855 . + . ID=g18410.t1.cds;Parent=g18410.t1; #chr_001 Manual three_prime_UTR 856 1182 . + . ID=g18410.t1.utr;Parent=g18410.t1; open (IN2,"<","$ARGV[1]");#DESeq2 open (OUT1,">","$ARGV[2]");#normalized.outputfile while (defined ($_=)){ if ($_=~/\s+gene\s+\d+\s+\d+\s+.+?ID=(g\d+);Name=(TTHERM_\d+);/){ $hash{$1}=$2} elsif ($_=~/\s+five_prime_UTR\s+(\d+)\s+(\d+)\s+.+?ID=(g\d+)/){ $tmp=($2-$1+1)/1000; $name=$hash{$3}; $length{$name}+=$tmp;} elsif ($_=~/\s+CDS\s+(\d+)\s+(\d+)\s+.+?ID=(g\d+)/){ $tmp=($2-$1+1)/1000; $name=$hash{$3}; $length{$name}+=$tmp;} elsif ($_=~/\s+three_prime_UTR\s+(\d+)\s+(\d+)\s+.+?ID=(g\d+)/){ $tmp=($2-$1+1)/1000; $name=$hash{$3}; $length{$name}+=$tmp} } print OUT1 "GeneID\tIBD1-BRD-ave\tIBD1-KO-ave\tSB210-ave\tIBD1-BRD-1\tIBD1-BRD-2\tIBD1-BRD-3\tIBD1-KO-1\tIBD1-KO-2\tIBD1-KO-3\tSB210-1\tSB210-2\tSB210-3\n"; while (defined ($_=)){ if ($_=~/(TTHERM_\d+)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+?)\s+(.+)/){ $name=$1; $BRD1=$2/$length{$name}; $BRD2=$3/$length{$name}; $BRD3=$4/$length{$name}; $KO1=$5/$length{$name}; $KO2=$6/$length{$name}; $KO3=$7/$length{$name}; $SB2101=$8/$length{$name}; $SB2102=$9/$length{$name}; $SB2103=$10/$length{$name}; $BRD=($BRD1+$BRD2+$BRD3)/3;$KO=($KO1+$KO2+$KO3)/3;$SB210=($SB2101+$SB2102+$SB2103)/3; print OUT1 "$name\t$BRD\t$KO\t$SB210\t$BRD1\t$BRD2\t$BRD3\t$KO1\t$KO2\t$KO3\t$SB2101\t$SB2102\t$SB2103\n"}}