File Coverage

lib/Sanger/CGP/Vagrent/Annotators/InsertionAnnotator.pm
Criterion Covered Total %
branch 69 78 88.4
subroutine 18 19 94.7
pod n/a
total 87 97 89.6


line bran sub pod code
1       package Sanger::CGP::Vagrent::Annotators::InsertionAnnotator;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2014-2017 Genome Research Ltd.
5       #
6       # Author: Cancer Genome Project cgpit@sanger.ac.uk
7       #
8       # This file is part of VAGrENT.
9       #
10       # VAGrENT is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see <http://www.gnu.org/licenses/>.
22       ##########LICENCE##########
23        
24        
25   1   use strict;
26        
27   1   use Bio::Seq;
28        
29   1   use Carp qw(cluck);
30   1   use Log::Log4perl;
31   1   use POSIX qw(ceil);
32   1   use Data::Dumper;
33        
34   1   use Sanger::CGP::Vagrent qw($VERSION);
35   1   use base qw(Sanger::CGP::Vagrent::Annotators::AbstractAnnotator);
36        
37       my $log = Log::Log4perl->get_logger(__PACKAGE__);
38        
39       1;
40        
41       sub _getAnnotation {
42   194   my ($self,$var) = @_;
43       $self->_clearMessages();
44 50     unless(defined($var) && $var->isa('Sanger::CGP::Vagrent::Data::Insertion')){
45       my $msg = 'require a Sanger::CGP::Vagrent::Data::Insertion object not a '.ref($var);
46       $self->addMessage($msg);
47       $log->info($msg);
48       return undef;
49       }
50 50     unless($var->isValid){
51       my $msg = 'insertion not valid';
52       $self->addMessage($msg);
53       $log->error($msg);
54       return undef;
55       }
56       my @trans = $self->_getTranscriptSource->getTranscripts($var);
57 100     unless(defined($trans[0])){
58       my $msg = 'No transcripts returned from transcript source';
59       $self->addMessage($msg);
60       $log->info($msg);
61       return undef;
62       }
63       my @groups;
64       foreach my $t(@trans){
65       my $g = $self->_generateAnnotatonGroup($var,$t);
66 100     if(defined($g)){
67       push(@groups,$g);
68       }
69       }
70 100     unless(scalar(@groups) > 0 && defined($groups[0])){
71       my $msg = 'No annotation groups generated';
72       $self->addMessage($msg);
73       $log->info($msg);
74       return undef;
75       }
76       return @groups;
77       }
78        
79       sub _generateAnnotatonGroup {
80   193   my ($self,$var,$tran) = @_;
81       my ($rAnnot,@groupClasses) = $self->_buildRNAAnnotation($var,$tran);
82       @groupClasses = sort @groupClasses;
83 100     unless(defined($rAnnot)){
84       my $msg = 'No mRNA annotation created';
85       $self->addMessage($msg);
86       $log->info($msg);
87       return undef;
88       }
89       my $group = Sanger::CGP::Vagrent::Data::AnnotationGroup->new( accession => $tran->getAccession,
90       label => $tran->getGeneName,
91       ccds => $tran->getCCDS,
92       type => $tran->getGeneType,);
93        
94 100     if($tran->isProteinCoding){
95 100     if($rAnnot->hasClassification($self->getIntronVariantClass) ||
96       $rAnnot->hasClassification($self->get5KBUpStreamVariantClass) ||
97       $rAnnot->hasClassification($self->get2KBUpStreamVariantClass) ||
98       $rAnnot->hasClassification($self->get5KBDownStreamVariantClass) ||
99       $rAnnot->hasClassification($self->get500BPDownStreamVariantClass)){
100       # Inronic or up/down stream mutations don't need to get any further annotation
101       $group->addAnnotation($rAnnot);
102       } else {
103       my $cAnnot = $self->_buildCDSAnnotation($var,$tran,$rAnnot);
104 50     unless(defined($cAnnot)){
105       my $msg = 'No CDS annotation created';
106       $self->addMessage($msg);
107       $log->info($msg);
108       return undef;
109       }
110       my $pAnnot = $self->_buildProteinAnnotation($var,$tran,$cAnnot,$rAnnot);
111 50     unless(defined($pAnnot)){
112       my $msg = 'No Protein annotation created';
113       $self->addMessage($msg);
114       $log->info($msg);
115       return undef;
116       }
117       $group->addAnnotation($rAnnot);
118       $group->addAnnotation($cAnnot);
119       $group->addAnnotation($pAnnot);
120       }
121       } else {
122       $group->addAnnotation($rAnnot);
123       }
124       $group->addClassification(@groupClasses);
125       return $group;
126       }
127        
128       sub _getMutatedCdsSequence {
129   21   my ($self,$wtDna,$min,$max,$mt) = @_;
130       my $mtDna = substr($wtDna,0,$min) . $mt . substr($wtDna,$min);
131       return $mtDna
132       }
133        
134       sub _getCdsMinPosForProteinCalculation {
135   21   my ($self,$cAnnot) = @_;
136       my $min = undef;
137 100     if($cAnnot->getMinOffset == 0){
  50      
138       $min = $cAnnot->getMinPos;
139       } elsif($cAnnot->getMinOffset == -1){
140       $min = $cAnnot->getMinPos - 1
141       } else {
142       my $msg = 'Unexpected cds min offset, expected 0 or -1 recieved: '.$cAnnot->getMinOffset;
143       $self->addMessage($msg);
144       $log->info($msg);
145       }
146       return $min;
147       }
148        
149       sub _getCdsMaxPosForProteinCalculation {
150   21   my ($self,$cAnnot) = @_;
151       return $cAnnot->getMaxPos();
152       }
153        
154       sub _buildRNAAnnotation {
155   193   my ($self,$var,$tran) = @_;
156       my ($mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset) = $self->_getmRNAPositions($var,$tran);
157 50     unless(defined($mrnaMin) && defined($mrnaMinOffset) && defined($mrnaMax) && defined($mrnaMaxOffset)){
158       my $msg = 'problem generating mrna coordinates';
159       $self->addMessage($msg);
160       $log->error($msg);
161       return undef;
162       }
163 50     unless(($mrnaMax - $mrnaMin == 1 && $mrnaMinOffset == 0 && $mrnaMaxOffset == 0) || # exon
164       ($mrnaMax == $mrnaMin && $mrnaMaxOffset - $mrnaMinOffset == 1) || # intron/splice/off transcript
165       ($mrnaMin == 0 && $mrnaMax == 1 && $mrnaMinOffset == -1 && $mrnaMaxOffset == 0) || # at start of transcript
166       ($mrnaMin == length($tran->getcDNASeq) && $mrnaMax == 0 && $mrnaMinOffset == 0 && $mrnaMaxOffset == 1)){ # at end of transcript
167       my $msg = "This is an insertion so the coordinates should be adjacent to each other, coordinates returned were MIN: $mrnaMin OFFSET:$mrnaMinOffset and MAX: $mrnaMax OFFSET: $mrnaMaxOffset";
168       $self->addMessage($msg);
169       $log->error($msg);
170       return undef;
171       }
172       my @groupClasses = ($self->classifyTranscript($tran));
173 100     if($mrnaMin == 0 && $mrnaMinOffset < 0){
  100      
174 100     if($self->_isWithin2KBUpstreamOffsetDistance($mrnaMinOffset)){
  100      
175       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getInsertionClass,$self->get2KBUpStreamVariantClass),@groupClasses);
176       } elsif($self->_isWithin5KBOffsetDistance($mrnaMinOffset)){
177       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getInsertionClass,$self->get5KBUpStreamVariantClass),@groupClasses);
178       } else {
179       my $msg = "variant isnt close enough to this transcript, nothing to do";
180       $self->addMessage($msg);
181       $log->error($msg);
182       return undef;
183       }
184       } elsif($mrnaMax == 0 && $mrnaMaxOffset > 0){
185 100     if($self->_isWithin500BPDownstreamOffsetDistance($mrnaMaxOffset)){
  100      
186       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getInsertionClass,$self->get500BPDownStreamVariantClass),@groupClasses);
187       } elsif($self->_isWithin5KBOffsetDistance($mrnaMaxOffset)){
188       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getInsertionClass,$self->get5KBDownStreamVariantClass),@groupClasses);
189       } else {
190       my $msg = "variant isnt close enough to this transcript, nothing to do";
191       $self->addMessage($msg);
192       $log->error($msg);
193       return undef;
194       }
195       }
196       my $wt = '-';
197       my $mt = undef;
198 100     if($tran->getStrand == 1){
199       $mt = lc($var->getInsertedSequence());
200       } else {
201       $mt = $self->_revcompSeq(lc($var->getInsertedSequence()));
202       }
203       $mt =~ s/t/u/ig;
204        
205       my $desc = undef;
206       my $subtype = undef;
207       my @classes = ($self->getInsertionClass);
208        
209 100     if($tran->isProteinCoding){
210 100     if($mrnaMax > $tran->getCdsMinPos && $mrnaMin < $tran->getCdsMaxPos){
  100      
  50      
211       # coding change
212       push(@groupClasses,$self->getCDSClass);
213       } elsif($mrnaMax <= $tran->getCdsMinPos){
214       # 5prime UTR
215       push(@groupClasses,$self->get5PrimeUtrClass);
216       push(@classes,$self->get5PrimeUtrVariantClass);
217       } elsif($mrnaMin >= $tran->getCdsMaxPos){
218       # 3prime UTR
219       push(@groupClasses,$self->get3PrimeUtrClass);
220       push(@classes,$self->get3PrimeUtrVariantClass);
221       } else {
222       my $msg = "strange positional info, can't work out subtype : var $mrnaMin $mrnaMinOffset $mrnaMax $mrnaMaxOffset transcript ".$tran->getCdsMinPos." - ".$tran->getCdsMaxPos;
223       $self->addMessage($msg);
224       $log->error($msg);
225       return undef;
226       }
227       }
228        
229 100     if(($mrnaMinOffset < 0 && $self->_isIntronicOffsetDistance($mrnaMinOffset)) || ($mrnaMaxOffset > 0 && $self->_isIntronicOffsetDistance($mrnaMaxOffset))){
  100      
230       # its intronic
231       push(@groupClasses,$self->getIntronClass);
232       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getInsertionClass,$self->getIntronVariantClass),@groupClasses);
233       } elsif(($mrnaMinOffset == 0 && $mrnaMaxOffset == 0) || ($mrnaMinOffset == -1 && $mrnaMaxOffset == 0) || ($mrnaMinOffset == 0 && $mrnaMaxOffset == 1)){
234       # its in an exon
235       push(@groupClasses,$self->getExonClass);
236 100     if($mrnaMinOffset != $mrnaMaxOffset) {
237       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype();
238       } else {
239       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionKnownSubtype();
240       }
241 50     if($tran->isProteinCoding){
242 100     if($self->_arrayHasString($self->getCDSClass,@groupClasses)){
243 100     if(length($mt) % 3 == 0){
244       push(@classes,$self->getInFrameVariantClass);
245       } else {
246       push(@classes,$self->getFrameShiftVariantClass);
247       }
248       }
249       } else {
250       push(@classes,$self->getNonCodingTranscriptVariantClass);
251       }
252       $desc = 'r.';
253 100     if($mrnaMinOffset == 0){
254       $desc .= $mrnaMin.'_';
255       } else {
256       $desc .= $mrnaMin.$mrnaMinOffset.'_';
257       }
258 100     if($mrnaMaxOffset == 0){
259       $desc .= $mrnaMax;
260       } else {
261       $desc .= $mrnaMax.'+'.$mrnaMaxOffset;
262       }
263       $desc .= 'ins'.$mt;
264       } else {
265       # its not in an exon, and its not intronic. Must be related to splice site.
266       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype();
267 100     if(($mrnaMinOffset < 0 && $mrnaMinOffset >= $self->_getConsesnsusSpliceBeforeBoundry) || ($mrnaMinOffset > 0 && $mrnaMaxOffset <= $self->_getConsesnsusSpliceAfterBoundry)){
268       # Essential Splice site change
269       push(@classes,$self->getEssentialSpliceSiteVariantClass);
270       push(@groupClasses,$self->getEssentialSpliceSiteClass);
271       } else {
272       # splice region change
273       push(@classes,$self->getSpliceRegionVariantClass);
274       push(@groupClasses,$self->getSpliceRegionClass);
275       }
276 100     if($mrnaMinOffset < 0){
277       $desc = 'r.'.$mrnaMin.$mrnaMinOffset.'_'.$mrnaMax.$mrnaMaxOffset.'ins'.$mt;
278       } else {
279       $desc = 'r.'.$mrnaMin.'+'.$mrnaMinOffset.'_'.$mrnaMax.'+'.$mrnaMaxOffset.'ins'.$mt;
280       }
281       }
282        
283       my $anno = Sanger::CGP::Vagrent::Data::Annotation->new(wt => uc($wt),
284       mt => uc($mt),
285       minpos => $mrnaMin,
286       minOffset => $mrnaMinOffset,
287       maxpos => $mrnaMax,
288       maxOffset => $mrnaMaxOffset,
289       acc => $tran->getAccession,
290       accversion => $tran->getAccessionVersion,
291       db => $tran->getDatabase,
292       dbversion => $tran->getDatabaseVersion,
293       seqlength => length($tran->getcDNASeq),
294       description => $desc,
295       context => Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
296       type => Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
297       subtype => $subtype);
298        
299       $anno->addClassification(@classes);
300        
301       return ($anno,@groupClasses);
302       }
303        
304       sub _getWildTypeStringForCDSAnno {
305   68   my ($self,$var,$tran,$rAnnot) = @_;
306       my $wt = $rAnnot->getWt();
307       $wt =~ s/u/t/ig;
308       return $wt;
309       }
310        
311       sub _getMutantStringForCDSAnno {
312   68   my ($self,$var,$tran,$rAnnot) = @_;
313       my $mt = $rAnnot->getMt();
314       $mt =~ s/u/t/ig;
315       return $mt;
316       }
317        
318       sub _getCDSDescriptionString {
319   68   my ($self,$tran,$mutStart,$mutEnd,$mutStartOffset,$mutEndOffset,$wt,$mt) = @_;
320       my $desc = 'c.';
321 100     if($mutStartOffset == 0){
  100      
322       $desc .= $mutStart.'_';
323       } elsif ($mutStartOffset > 0){
324       $desc .= $mutStart.'+'.$mutStartOffset.'_';
325       } else {
326       $desc .= $mutStart.$mutStartOffset.'_';
327       }
328 100     if($mutEndOffset == 0){
  100      
329       $desc .= $mutEnd;
330       } elsif($mutEndOffset > 0) {
331       $desc .= $mutEnd.'+'.$mutEndOffset;
332       } else {
333       $desc .= $mutEnd.$mutEndOffset;
334       }
335       $desc .= 'ins'.uc($mt);
336       return $desc;
337       }
338        
339       sub _getDefaultCDSAnnotationType {
340   68   return Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType();
341       }
342        
343       sub _getDefaultProteinAnnotationType {
344   0   return Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType();
345       }
346        
347       =head1 NAME
348        
349       Sanger::CGP::Vagrent::Annotators::InsertionAnnotator - Annotator for insertion variants
350        
351       =head1 DESCRIPTION
352        
353       This annotates insertion variants, it provides L<AnnotatonGroups|Sanger::CGP::Vagrent::Data::AnnotationGroup> for each transcript returned from the L<TranscriptSource|Sanger::CGP::Vagrent::TranscriptSource::AbstractTranscriptSource>
354        
355       It will only process L<Sanger::CGP::Vagrent::Data::Insertion|Sanger::CGP::Vagrent::Data::Insertion> objects, if any other L<Variation|Sanger::CGP::Vagrent::Data::AbstractVariation> objects are sent in it will return an empty answer.
356        
357       It inherits from L<Sanger::CGP::Vagrent::Annotators::AbstractAnnotator|Sanger::CGP::Vagrent::Annotators::AbstractAnnotator>.
358        
359       =head1 METHODS
360        
361       See L<Sanger::CGP::Vagrent::Annotators::AbstractAnnotator|Sanger::CGP::Vagrent::Annotators::AbstractAnnotator>