File Coverage

lib/Sanger/CGP/Vagrent/Annotators/ComplexIndelAnnotator.pm
Criterion Covered Total %
branch 88 102 86.2
subroutine 16 17 94.1
pod n/a
total 104 119 87.3


line bran sub pod code
1       package Sanger::CGP::Vagrent::Annotators::ComplexIndelAnnotator;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2014-2017 Genome Research Ltd.
5       #
6       # Author: Cancer Genome Project cgpit@sanger.ac.uk
7       #
8       # This file is part of VAGrENT.
9       #
10       # VAGrENT is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see <http://www.gnu.org/licenses/>.
22       ##########LICENCE##########
23        
24        
25   1   use strict;
26        
27   1   use Bio::Seq;
28        
29   1   use Carp qw(cluck);
30   1   use Log::Log4perl;
31   1   use POSIX qw(ceil);
32   1   use Data::Dumper;
33   1   use Sanger::CGP::Vagrent qw($VERSION);
34        
35   1   use base qw(Sanger::CGP::Vagrent::Annotators::DeletionAnnotator);
36        
37       my $log = Log::Log4perl->get_logger(__PACKAGE__);
38        
39       1;
40        
41       sub _getAnnotation {
42   230   my ($self,$var) = @_;
43       $self->_clearMessages();
44 50     unless(defined($var) && $var->isa('Sanger::CGP::Vagrent::Data::ComplexIndel')){
45       my $msg = 'require a Sanger::CGP::Vagrent::Data::ComplexIndel object not a '.ref($var);
46       $self->addMessage($msg);
47       $log->info($msg);
48       return undef;
49       }
50 50     unless($var->isValid){
51       my $msg = 'complex indel not valid';
52       $self->addMessage($msg);
53       $log->error($msg);
54       return undef;
55       }
56       my @trans = $self->_getTranscriptSource->getTranscripts($var);
57 100     unless(defined($trans[0])){
58       my $msg = 'No transcripts returned from transcript source';
59       $self->addMessage($msg);
60       $log->info($msg);
61       return undef;
62       }
63       my @groups;
64       foreach my $t(@trans){
65       my $g = $self->_generateAnnotatonGroup($var,$t);
66 100     if(defined($g)){
67       push(@groups,$g);
68       }
69       }
70 100     unless(scalar(@groups) > 0 && defined($groups[0])){
71       my $msg = 'No annotation groups generated';
72       $self->addMessage($msg);
73       $log->info($msg);
74       return undef;
75       }
76       return @groups;
77       }
78        
79       sub _generateAnnotatonGroup {
80   227   my ($self,$var,$tran) = @_;
81       my ($rAnnot,@groupClasses) = $self->_buildRNAAnnotation($var,$tran);
82       @groupClasses = sort @groupClasses;
83 100     unless(defined($rAnnot)){
84       my $msg = 'No mRNA annotation created';
85       $self->addMessage($msg);
86       $log->info($msg);
87       return undef;
88       }
89       my $group = Sanger::CGP::Vagrent::Data::AnnotationGroup->new( accession => $tran->getAccession,
90       label => $tran->getGeneName,
91       ccds => $tran->getCCDS,
92       type => $tran->getGeneType,);
93        
94 100     if($tran->isProteinCoding){
95 100     if( ( $rAnnot->hasClassification($self->getIntronVariantClass) ||
96       $rAnnot->hasClassification($self->get5KBUpStreamVariantClass) ||
97       $rAnnot->hasClassification($self->get2KBUpStreamVariantClass) ||
98       $rAnnot->hasClassification($self->get5KBDownStreamVariantClass) ||
99       $rAnnot->hasClassification($self->get500BPDownStreamVariantClass)) &&
100       !$rAnnot->hasClassification($self->getEssentialSpliceSiteVariantClass) &&
101       !$rAnnot->hasClassification($self->getSpliceRegionVariantClass) &&
102       !$rAnnot->hasClassification($self->getFrameShiftVariantClass) &&
103       !$rAnnot->hasClassification($self->getInFrameVariantClass) &&
104       !$rAnnot->hasClassification($self->get5PrimeUtrVariantClass) &&
105       !$rAnnot->hasClassification($self->get3PrimeUtrVariantClass) &&
106       !$rAnnot->hasClassification($self->getComplexChangeVariantClass)){
107       # Inronic or up/down stream mutations don't need to get any further annotation
108       $group->addAnnotation($rAnnot);
109       } else {
110       my $cAnnot = $self->_buildCDSAnnotation($var,$tran,$rAnnot);
111 50     unless(defined($cAnnot)){
112       my $msg = 'No CDS annotation created';
113       $self->addMessage($msg);
114       $log->info($msg);
115       return undef;
116       }
117       my $pAnnot = $self->_buildProteinAnnotation($var,$tran,$cAnnot,$rAnnot);
118 50     unless(defined($pAnnot)){
119       my $msg = 'No Protein annotation created';
120       $self->addMessage($msg);
121       $log->info($msg);
122       return undef;
123       }
124       $group->addAnnotation($rAnnot);
125       $group->addAnnotation($cAnnot);
126       $group->addAnnotation($pAnnot);
127       }
128       } else {
129       $group->addAnnotation($rAnnot);
130       }
131       $group->addClassification(@groupClasses);
132       return $group;
133       }
134        
135       sub _buildRNAAnnotation {
136   227   my ($self,$var,$tran) = @_;
137       my ($mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset) = $self->_getmRNAPositions($var,$tran);
138 50     unless(defined($mrnaMin) && defined($mrnaMinOffset) && defined($mrnaMax) && defined($mrnaMaxOffset)){
139       my $msg = 'problem generating mrna coordinates';
140       $self->addMessage($msg);
141       $log->error($msg);
142       return undef;
143       }
144 50     unless($self->_safetyCheck($var,$tran,$mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset)){
145       # something has gone wrong,
146       my $msg = 'The affected transcript WT sequence in the deleted sequence doesnt match the affected transcript WT sequence from the transcript';
147       $self->addMessage($msg);
148       $log->error($msg);
149       return undef;
150       }
151        
152       my @classes = ($self->getComplexIndelClass);
153       my @groupClasses = ($self->classifyTranscript($tran));
154        
155       my $upstream = $self->_upstreamVariantCheck($mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset,\@classes);
156 100     if(defined $upstream){
157 100     if($upstream){
158       return ($self->_buildUnknownMRNAAnnotation($var,$tran,@classes),@groupClasses);
159       }
160       } else {
161       return undef;
162       }
163        
164       my $downstream = $self->_downstreamVariantCheck($mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset,\@classes);
165 100     if(defined $downstream){
166 100     if($downstream){
167       return ($self->_buildUnknownMRNAAnnotation($var,$tran,@classes),@groupClasses);
168       }
169       } else {
170       return undef;
171       }
172        
173       # delt with up/down stream issues, can reset coordinates to the extremes of the transcript;
174 100     if($mrnaMin == 0 && $mrnaMinOffset <= 0){
175       $mrnaMin = 1;
176       $mrnaMinOffset = 0;
177       }
178 100     if($mrnaMax == 0 && $mrnaMaxOffset >= 0){
179       $mrnaMax = length($tran->getcDNASeq);
180       $mrnaMaxOffset = 0;
181       }
182        
183 100     if($tran->isProteinCoding){
184 100     if(($mrnaMax > $tran->getCdsMinPos || ($mrnaMax == $tran->getCdsMinPos && $mrnaMaxOffset >= 0)) &&
185       ($mrnaMin < $tran->getCdsMaxPos || ($mrnaMin == $tran->getCdsMaxPos && $mrnaMinOffset <= 0))){
186       # if($mrnaMax >= $tran->getCdsMinPos && $mrnaMin <= $tran->getCdsMaxPos){
187       # coding change
188       push(@groupClasses,$self->getCDSClass);
189       }
190 100     if($mrnaMin < $tran->getCdsMinPos || ($mrnaMin == $tran->getCdsMinPos && $mrnaMinOffset < 0)){
191       #if($mrnaMin < $tran->getCdsMinPos){
192       # 5prime UTR
193       push(@groupClasses,$self->get5PrimeUtrClass);
194 100     push(@classes,$self->get5PrimeUtrVariantClass) unless($self->_arrayHasString($self->getCDSClass,@groupClasses));
195       }
196 100     if($mrnaMax > $tran->getCdsMaxPos || ($mrnaMax == $tran->getCdsMaxPos && $mrnaMaxOffset > 0)){
197       #if($mrnaMax > $tran->getCdsMaxPos){
198       # 3prime UTR
199       push(@groupClasses,$self->get3PrimeUtrClass);
200 100     push(@classes,$self->get3PrimeUtrVariantClass) unless($self->_arrayHasString($self->getCDSClass,@groupClasses));
201       }
202       }
203        
204       my $tmpGroupClassesHash = $self->_classifyDeletion($tran,$mrnaMin,$mrnaMinOffset,$mrnaMax,$mrnaMaxOffset);
205       push(@groupClasses, keys %$tmpGroupClassesHash);
206 100     if(scalar(keys %$tmpGroupClassesHash) == 1 && defined($tmpGroupClassesHash->{$self->getIntronClass})){
207       # its intron only
208       return ($self->_buildUnknownMRNAAnnotation($var,$tran,$self->getComplexIndelClass,$self->getIntronVariantClass),@groupClasses);
209       }
210        
211       my $wt = $self->_getWildTypeStringForRNAAnno($var,$tran);
212       $wt =~ tr/Tt/Uu/;
213       my $mt = undef;
214 100     if($tran->getStrand == 1){
215       $mt = lc($var->getInsertedSequence());
216       } else {
217       $mt = $self->_revcompSeq(lc($var->getInsertedSequence()));
218       }
219       $mt =~ tr/Tt/Uu/;
220        
221       my $desc = undef;
222        
223 50     if($mrnaMin == 1 && $mrnaMax == length($tran->getcDNASeq)){
224       $desc = 'r.0';
225       } else {
226       $desc = $self->_generateDNAindelDescriptionString('r.',$mrnaMin,$mrnaMax,$mrnaMinOffset,$mrnaMaxOffset,$wt,$mt);
227       }
228        
229       my $subtype = undef;
230        
231 50     if($mrnaMin == 0 && $mrnaMax == 0){
  100      
232       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype();
233       } elsif($mrnaMinOffset == 0 && $mrnaMaxOffset == 0){
234       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionKnownSubtype();
235       } else {
236       $subtype = Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype();
237       }
238        
239       my $anno = Sanger::CGP::Vagrent::Data::Annotation->new( wt => uc($wt),
240       mt => uc($mt),
241       minpos => $mrnaMin,
242       minOffset => $mrnaMinOffset,
243       maxpos => $mrnaMax,
244       maxOffset => $mrnaMaxOffset,
245       acc => $tran->getAccession,
246       accversion => $tran->getAccessionVersion,
247       db => $tran->getDatabase,
248       dbversion => $tran->getDatabaseVersion,
249       seqlength => length($tran->getcDNASeq),
250       description => $desc,
251       context => Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
252       type => Sanger::CGP::Vagrent::Data::Annotation::getComplexAnnotationType(),
253       subtype => $subtype);
254        
255 100     if(defined($tmpGroupClassesHash->{$self->getExonClass}) && defined($tmpGroupClassesHash->{$self->getEssentialSpliceSiteClass})){
256       # special case, variation spans over the end of the exon. This is a complex change in the transcript
257       push(@classes,$self->getComplexChangeVariantClass);
258 50     unless($tran->isProteinCoding){
259       push(@classes,$self->getNonCodingTranscriptVariantClass);
260       }
261        
262       } else {
263 100     if(defined($tmpGroupClassesHash->{$self->getSpliceRegionClass})){
264       push(@classes,$self->getSpliceRegionVariantClass);
265       }
266 100     if(defined($tmpGroupClassesHash->{$self->getEssentialSpliceSiteClass})){
267       push(@classes,$self->getEssentialSpliceSiteVariantClass);
268       }
269 100     if(defined($tmpGroupClassesHash->{$self->getIntronClass})){
270       push(@classes,$self->getIntronVariantClass);
271       }
272 100     if(defined($tmpGroupClassesHash->{$self->getExonClass})){
273 100     if($tran->isProteinCoding){
274 100     if($self->_arrayHasString($self->getCDSClass,@groupClasses)){
275 50     if($self->_arrayHasString($self->get2KBUpStreamVariantClass,@classes) || $self->_arrayHasString($self->get5PrimeUtrClass,@groupClasses) || $self->_arrayHasString($self->get3PrimeUtrClass,@groupClasses) || $self->_arrayHasString($self->get500BPDownStreamVariantClass,@classes)){
276       # it overhangs the CDS-to-UTR/upstream/downstream boundry
277       push(@classes,$self->getComplexChangeVariantClass);
278       } else {
279 100     if(abs(length($wt) - length($mt)) % 3 == 0){
280       push(@classes,$self->getInFrameVariantClass);
281       } else {
282       push(@classes,$self->getFrameShiftVariantClass);
283       }
284       }
285       }
286       } else {
287       push(@classes,$self->getNonCodingTranscriptVariantClass);
288       }
289       }
290       }
291        
292       $anno->addClassification(@classes);
293        
294       return ($anno,@groupClasses);
295       }
296        
297       sub _getMutantStringForCDSAnno {
298   79   my ($self,$var,$tran,$rAnnot) = @_;
299       my $mt = $rAnnot->getMt;
300       $mt =~ s/u/t/ig;
301       return $mt;
302       }
303        
304       sub _getCDSDescriptionString {
305   79   my ($self,$tran,$mutStart,$mutEnd,$mutStartOffset,$mutEndOffset,$wt,$mt) = @_;
306       my $desc = undef;
307 50     if($mutStart == 1 && $mutEnd == length($tran->getCdsSeq)){
308       $desc = 'c.0';
309       } else {
310       $desc = $self->_generateDNAindelDescriptionString('c.',$mutStart,$mutEnd,$mutStartOffset,$mutEndOffset,uc($wt),uc($mt));
311       }
312       return $desc;
313       }
314        
315       sub _getMutatedCdsSequence {
316   18   my ($self,$wtDna,$min,$max,$mt) = @_;
317       my $codingDelLength = ($max - $min) + 1;
318       my $mtDna = $wtDna;
319       substr($mtDna,($min - 1),$codingDelLength,$mt);
320       return $mtDna
321       }
322        
323       sub _generateDNAindelDescriptionString {
324   215   my ($self,$pre,$mutStart,$mutEnd,$mutStartOffset,$mutEndOffset,$wt,$mt) = @_;
325       my $desc = 'WIBBLE';
326 100     if($mutStart == $mutEnd && $mutStartOffset == $mutEndOffset){
327       # single base change
328 100     if($mutStartOffset > 0){
  100      
  50      
329       $desc = $pre.$mutStart.'+'.$mutStartOffset.'del'.$wt;
330       } elsif($mutStartOffset < 0){
331       $desc = $pre.$mutStart.$mutStartOffset.'del'.$wt;
332       } elsif ($mutStartOffset == 0) {
333       $desc = $pre.$mutStart.'del'.$wt;
334       }
335       $desc .= 'ins'.$mt;
336        
337       } else {
338       # multi base change
339 100     if($mutStartOffset > 0){
  100      
  50      
340       $desc = $pre.$mutStart.'+'.$mutStartOffset.'_';
341       } elsif($mutStartOffset < 0){
342       $desc = $pre.$mutStart.$mutStartOffset.'_';
343       } elsif($mutStartOffset == 0) {
344       $desc = $pre.$mutStart.'_';
345       }
346        
347 100     if($mutEndOffset > 0){
  100      
  50      
348       $desc .= $mutEnd.'+'.$mutEndOffset.'del';
349       } elsif($mutEndOffset < 0){
350       $desc .= $mutEnd.$mutEndOffset.'del';
351       } elsif($mutEndOffset == 0){
352       $desc .= $mutEnd.'del';
353       }
354        
355 100     if(length($wt) > 15){
356       $desc .= length($wt);
357       } else {
358       $desc .= $wt;
359       }
360        
361       $desc .= 'ins'.$mt;
362       }
363       return $desc;
364       }
365        
366       sub _getDefaultCDSAnnotationType {
367   79   return Sanger::CGP::Vagrent::Data::Annotation::getComplexAnnotationType();
368       }
369        
370       sub _getDefaultProteinAnnotationType {
371   0   return Sanger::CGP::Vagrent::Data::Annotation::getComplexAnnotationType();
372       }
373        
374        
375       __END__
376        
377       =head1 NAME
378        
379       Sanger::CGP::Vagrent::Annotators::ComplexIndelAnnotator - Annotator for complex indel variants
380        
381       =head1 DESCRIPTION
382        
383       This annotates complex insertion/deletion variants, it provides L<AnnotatonGroups|Sanger::CGP::Vagrent::Data::AnnotationGroup> for each transcript returned from the L<TranscriptSource|Sanger::CGP::Vagrent::TranscriptSource::AbstractTranscriptSource>
384        
385       It will only process L<Sanger::CGP::Vagrent::Data::ComplexIndel|Sanger::CGP::Vagrent::Data::ComplexIndel> objects, if any other L<Variation|Sanger::CGP::Vagrent::Data::AbstractVariation> objects are sent in it will return an empty answer.
386        
387       It inherits from L<Sanger::CGP::Vagrent::Annotators::DeletionAnnotator|Sanger::CGP::Vagrent::Annotators::DeletionAnnotator>.
388        
389       =head1 METHODS
390        
391       See L<Sanger::CGP::Vagrent::Annotators::AbstractAnnotator|Sanger::CGP::Vagrent::Annotators::AbstractAnnotator>