File Coverage

lib/Sanger/CGP/Vagrent/TranscriptSource/FileBasedTranscriptSource.pm
Criterion Covered Total %
branch 14 38 36.8
subroutine 16 21 76.1
pod 2 2 100.0
total 32 61 52.4


line bran sub pod code
1       package Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2014 Genome Research Ltd.
5       #
6       # Author: Cancer Genome Project cgpit@sanger.ac.uk
7       #
8       # This file is part of VAGrENT.
9       #
10       # VAGrENT is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see <http://www.gnu.org/licenses/>.
22       ##########LICENCE##########
23        
24        
25   4   use strict;
26        
27   4   use Carp;
28   4   use Log::Log4perl;
29   4   use Data::Dumper;
30   4   use Const::Fast qw(const);
31        
32   4   use Bio::DB::HTS;
33   4   use Bio::DB::HTS::Tabix;
34        
35   4   use Sanger::CGP::Vagrent::Data::Transcript;
36   4   use Sanger::CGP::Vagrent::Data::Exon;
37   4   use Sanger::CGP::Vagrent qw($VERSION);
38   4   use base qw(Sanger::CGP::Vagrent::TranscriptSource::AbstractTranscriptSource);
39        
40       my $log = Log::Log4perl->get_logger(__PACKAGE__);
41        
42       const my $SEARCH_BUFFER => 10000;
43        
44       const my $GENE_GRAB_TEMPLATE => 'tabix %s %s | cut -s -f5 | uniq |';
45       const my $TRANSCRIPT_GRAB_TEMPLATE => q{tabix %s %s | awk '$5 == "%s" { print $0;}' | cut -s -f7 |};
46        
47       1;
48        
49       sub getTranscripts {
50   1724 1 my ($self,$gp) = @_;
51 50     unless(defined($gp) && $gp->isa('Sanger::CGP::Vagrent::Data::AbstractGenomicPosition')){
52       $log->error("Did not recieve a Sanger::CGP::Vagrent::Data::AbstractGenomicPosition object");
53       return undef;
54       }
55       my $trans = $self->_getTranscriptsFromCache($gp);
56 100     return @$trans if defined $trans;
57       return;
58       }
59        
60       sub getTranscriptsForNextGeneInDumpRegion {
61   0 1 my ($self) = @_;
62       my $gr = $self->getDumpRegion;
63 0     $log->logcroak('must define a dump region before trying to loop over the genes') unless defined $gr;
64 0     if($gr->getMinPos == $gr->getMaxPos && $gr->getMaxPos == 0){
65       # full sequence region scan
66       $self->{_dumpInfo}->{_fullSeq} = 1;
67       } else {
68       $self->{_dumpInfo}->{_fullSeq} = 0;
69       }
70 0     $self->_populateGeneList($gr) unless defined $self->{_dumpInfo}->{_geneList};
71 0     $self->_getDumpTranscriptStore($gr) unless defined $self->{_dumpInfo}->{_transcriptStore};
72        
73       my $geneListSize = scalar @{$self->{_dumpInfo}->{_geneList}};
74       for(my $i = $self->{_dumpInfo}->{_counter} ; $i <= $geneListSize ; $i++){
75       $self->{_dumpInfo}->{_counter}++;
76 0     next unless(defined $self->{_dumpInfo}->{_geneList}->[$i]);
77       my $transList = $self->_getTranascriptsForGeneName($gr,$self->{_dumpInfo}->{_geneList}->[$i]);
78 0     next unless(defined $transList && scalar(@$transList) > 0);
79       return @$transList;
80       }
81       return undef;
82       }
83        
84       sub _getTranascriptsForGeneName {
85   0   my ($self,$gr,$genename) = @_;
86       my $out;
87       my $cmd = sprintf $TRANSCRIPT_GRAB_TEMPLATE, $self->{_cache}, $self->_generateLocationString($gr), $genename;
88 0     open my $fh, $cmd or $log->logcroak("unable to run transcript lookup for gene name $genename: $cmd");
89       while(<$fh>){
90       my $VAR1;
91       eval $_;
92       push(@$out,$VAR1);
93       }
94       close $fh;
95       return $out;
96       }
97        
98       sub _getDumpTranscriptStore {
99   0   my ($self,$gr) = @_;
100        
101        
102       }
103        
104        
105        
106       sub _populateGeneList {
107   0   my ($self,$gr) = @_;
108       my $cmd = sprintf $GENE_GRAB_TEMPLATE, $self->{_cache}, $self->_generateLocationString($gr);
109 0     open my $fh, $cmd or $log->logcroak('unable to run gene name lookup for region dump');
110       @{$self->{_dumpInfo}->{_geneList}} = <$fh>;
111       close $fh;
112       chomp @{$self->{_dumpInfo}->{_geneList}};
113       $self->{_dumpInfo}->{_counter} = 0;
114       }
115        
116       sub _generateLocationString {
117   0   my ($self,$gr) = @_;
118       return $gr->getChr.':'.$gr->getMinPos.'-'.$gr->getMaxPos;
119       }
120        
121       sub _getTranscriptsFromCache {
122   1724   my ($self,$gp) = @_;
123 100     $self->{_cache_tbx} = Bio::DB::HTS::Tabix->new(filename => $self->{_cache}) unless defined $self->{_cache_tbx};
124       my $min;
125       my $max = $gp->getMaxPos + $SEARCH_BUFFER;
126 50     if($gp->getMinPos() < $SEARCH_BUFFER){
127       $min = 0;
128       } else {
129       $min = ($gp->getMinPos - $SEARCH_BUFFER);
130       }
131       my $iter = $self->{_cache_tbx}->query(sprintf '%s:%d-%d', $gp->getChr(),$min,$max);
132 50     return undef unless defined $iter;
133       my $out = undef;
134       while(my $ret = $iter->next){
135       my $raw = (split("\t",$ret))[6];
136       my $VAR1;
137       eval $raw;
138       $VAR1->{_cdnaseq} = $self->_getTranscriptSeq($VAR1);
139       push @$out, $VAR1;
140       }
141       return $out;
142       }
143        
144       sub _init {
145   862   my $self = shift;
146       my %vars = @_;
147       foreach my $k(keys(%vars)){
148 50     if($k eq 'cache'){
149       $self->_setCacheFile($vars{$k});
150       }
151       }
152       }
153        
154       sub _setCacheFile {
155   862   my ($self,$cache) = @_;
156        
157 50     unless(-e $cache && -f $cache && -r $cache){
158       $log->logcroak("Specified cache file is unreadable: $cache");
159       }
160       my $cache_index = $cache .".tbi";
161 50     unless(-e $cache_index && -f $cache_index && -r $cache_index){
162       $log->logcroak("cache index file is unreadable: $cache_index");
163       }
164       my $fa = $cache;
165       $fa =~ s/\.cache.+$/.fa/;
166 50     unless(-e $fa && -f $fa && -r $fa){
167       $log->logcroak("cache fasta file is unreadable: $fa");
168       }
169       my $fai = $fa . ".fai";
170 50     unless(-e $fai && -f $fai && -r $fai){
171       $log->logcroak("cache fasta index file is unreadable: $fai");
172       }
173       $self->{_cache} = $cache;
174       $self->{_cache_fa} = $fa;
175       }
176        
177       sub _getTranscriptSeq {
178   1704   my ($self,$trans) = @_;
179 100     unless(defined $self->{_fai_obj}){
180       $self->{_fai_obj} = Bio::DB::HTS::Fai->load($self->{_cache_fa});
181       }
182       return $self->{_fai_obj}->fetch($trans->getAccession);
183       }