File Coverage

lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm
Criterion Covered Total %
branch 4 12 33.3
subroutine 50 54 92.5
pod 0 40 0.0
total 54 106 50.9


line bran sub pod code
1       =head1 NAME
2        
3       Sanger::CGP::Vagrent::Ontology::SequenceOntologyClassifier - Contains a set of rules to classify
4       Annotations using Sequence Ontology terms
5        
6       =head1 DESCRIPTION
7        
8       This class is designed to be extended, it contains a collection of test functions that classify
9       Sanger::CGP::Vagrent::Data::Annotation objects and assign the appropriate Sequence Ontology terms.
10        
11       =cut
12        
13       package Sanger::CGP::Vagrent::Ontology::SequenceOntologyClassifier;
14        
15       ##########LICENCE##########
16       # Copyright (c) 2014 Genome Research Ltd.
17       #
18       # Author: Cancer Genome Project cgpit@sanger.ac.uk
19       #
20       # This file is part of VAGrENT.
21       #
22       # VAGrENT is free software: you can redistribute it and/or modify it under
23       # the terms of the GNU Affero General Public License as published by the Free
24       # Software Foundation; either version 3 of the License, or (at your option) any
25       # later version.
26       #
27       # This program is distributed in the hope that it will be useful, but WITHOUT
28       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
30       # details.
31       #
32       # You should have received a copy of the GNU Affero General Public License
33       # along with this program. If not, see <http://www.gnu.org/licenses/>.
34       ##########LICENCE##########
35        
36        
37   4   use strict;
38        
39   4   use Const::Fast qw(const);
40   4   use Cwd qw(abs_path);
41   4   use File::Basename;
42   4   use File::Path;
43   4   use List::Util qw(first);
44        
45   4   use Log::Log4perl qw(:easy);
46        
47   4   use File::ShareDir qw(module_dir);
48   4   use Config::IniFiles;
49        
50       my $log = Log::Log4perl->get_logger(__PACKAGE__);
51   4   use Sanger::CGP::Vagrent qw($VERSION);
52   4   use base qw(Sanger::CGP::Vagrent);
53        
54       # constant values holding SO class identifiers
55       const my $SO_SUBSTITUTION_CLASS => 'SO:1000002:substitution';
56       const my $SO_INSERTION_CLASS => 'SO:0000667:insertion';
57       const my $SO_DELETION_CLASS => 'SO:0000159:deletion';
58       const my $SO_COMPLEXINDEL_CLASS => 'SO:1000032:indel';
59        
60       # sequence variant classes
61       const my $SO_UNKNOWN_VARIANT_CLASS => 'SO:0001576:transcript_variant';
62       const my $SO_SYN_VARIANT_CLASS => 'SO:0001588:synonymous_codon';
63       const my $SO_NON_SYN_VARIANT_CLASS => 'SO:0001583:non_synonymous_codon';
64       const my $SO_STOP_GAINED_VARIANT_CLASS => 'SO:0001587:stop_gained';
65       const my $SO_STOP_LOST_VARIANT_CLASS => 'SO:0001578:stop_lost';
66       const my $SO_STOP_RETAINED_VARIANT_CLASS => 'SO:0001567:stop_retained_variant';
67       const my $SO_START_LOST_VARIANT_CLASS => 'SO:0001582:initiator_codon_change';
68       const my $SO_PREMATURE_START_GAINED_VARIANT_CLASS => 'SO:0001988:5_prime_UTR_premature_start_codon_gain_variant';
69       const my $SO_INTRON_VARIANT_CLASS => 'SO:0001627:intron_variant';
70       const my $SO_CODON_VARIANT_CLASS => 'SO:0001581:codon_variant';
71       const my $SO_5PRIME_UTR_VARIANT_CLASS => 'SO:0001623:5_prime_UTR_variant';
72       const my $SO_3PRIME_UTR_VARIANT_CLASS => 'SO:0001624:3_prime_UTR_variant';
73       const my $SO_NC_TRANS_VARIANT_CLASS => 'SO:0001619:nc_transcript_variant';
74       const my $SO_ESS_SPLICE_VARIANT_CLASS => 'SO:0001629:splice_site_variant';
75       const my $SO_SPLICE_REGION_VARIANT_CLASS => 'SO:0001995:extended_intronic_splice_region_variant';
76       const my $SO_FRAMESHIFT_VARIANT_CLASS => 'SO:0001589:frameshift_variant';
77       const my $SO_INFRAME_VARIANT_CLASS => 'SO:0001650:inframe_variant';
78       const my $SO_INFRAME_CODON_GAIN_VARIANT_CLASS => 'SO:0001651:inframe_codon_gain';
79       const my $SO_INFRAME_CODON_LOSS_VARIANT_CLASS => 'SO:0001652:inframe_codon_loss';
80       const my $SO_COMPLEX_CHANGE_VARIANT_CLASS => 'SO:0001577:complex_change_in_transcript';
81        
82       const my $SO_5KB_UPSTREAM_VARIANT_CLASS => 'SO:0001635:5KB_upstream_variant';
83       const my $SO_2KB_UPSTREAM_VARIANT_CLASS => 'SO:0001636:2KB_upstream_variant';
84       const my $SO_5KB_DOWNSTREAM_VARIANT_CLASS => 'SO:0001633:5KB_downstream_variant';
85       const my $SO_500BP_DOWNSTREAM_VARIANT_CLASS => 'SO:0001634:500B_downstream_variant';
86        
87       const my $SO_EXON_CLASS => 'SO:0000147:exon';
88       const my $SO_INTRON_CLASS => 'SO:0000188:intron';
89       const my $SO_SPLICE_REGION_CLASS => 'SO:0001996:extended_intronic_splice_region';
90       const my $SO_ESS_SPLICE_SITE_CLASS => 'SO:0001993:extended_cis_splice_site';
91        
92       # mature transcript mRNA region classes
93       const my $SO_CDS_CLASS => 'SO:0000316:CDS';
94       const my $SO_5PRIME_UTR_CLASS => 'SO:0000204:five_prime_UTR';
95       const my $SO_3PRIME_UTR_CLASS => 'SO:0000205:three_prime_UTR';
96        
97       # gene attribute classes
98       const my $SO_PROTEIN_CODING_CLASS => 'SO:0000010:protein_coding';
99       const my $SO_NON_PROTEIN_CODING_CLASS => 'SO:0000011:non_protein_coding';
100        
101       const my $TERM_SUMMARY_INI => 'SequenceOntologySummary.ini';
102        
103       # sub DESTROY {
104       # ##### Handy DESTROY function that will print ontology combinations that don't exist in the summary lookup at program termination.
105       # my $self = shift;
106       # if(defined $self->{'_SOsum'}){
107       # foreach my $k( sort {$self->{'_notSummary'}->{$b} <=> $self->{'_notSummary'}->{$a}} keys %{$self->{'_notSummary'}}){
108       # print $self->{'_notSummary'}->{$k},' - ',$k,"\n" unless $self->{'_notSummary'}->{$k} == 0;
109       # }
110       # }
111       # }
112        
113       sub _ontologyInit {
114   862   my $self = shift;
115       my %vars = @_;
116 50     if(exists $vars{'ontologySymmary'} && defined $vars{'ontologySymmary'}){
117       $self->{'_SOsum'} = $vars{'ontologySymmary'};
118       } else {
119       $self->_loadOntologySummaryIni();
120       }
121       }
122        
123       sub _loadOntologySummaryIni {
124   862   my $self = shift;
125       my $share_path = dirname(abs_path($0)).'/../share';
126 50     $share_path = module_dir('Sanger::CGP::Vagrent') unless(-e File::Spec->catfile($share_path,$TERM_SUMMARY_INI));
127       $self->{'_SOsum'} = new Config::IniFiles( -file => File::Spec->catfile($share_path,$TERM_SUMMARY_INI));
128       }
129        
130       sub getOntologySummary {
131   0 0 my ($self,$anno) = @_;
132       my $mrna = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext());
133       my $cds = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext());
134       my $prot = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext());
135       my @class = $anno->getClassifications;
136       my @terms;
137       foreach my $a($anno,$mrna,$cds,$prot){
138 0     next unless defined $a;
139       foreach my $c($a->getClassifications){
140 0 0   next if first {$c eq $_} @terms;
141       push @terms, $c;
142       }
143       }
144       my $thing = join ',', @terms;
145        
146       my $sum = $self->{'_SOsum'}->val('SUMMARY',$thing,$thing);
147 0     if($sum eq $thing){
148       $self->{'_notSummary'}->{$thing}++;
149       }
150        
151       return $sum;
152       }
153        
154       sub getSummaryCache {
155   0 0 return shift->{'_SOsum'};
156       }
157        
158       sub classifyTranscript {
159   852 0 my ($self,$tran) = @_;
160 100     if($tran->isProteinCoding){
161       return $self->getProteinCodingClass;
162       } else {
163       return $self->getNonProteinCodingClass;
164       }
165       }
166        
167       # variant type classes
168        
169       sub getSubstitutionClass {
170   446 0 my $self = shift;
171       return $SO_SUBSTITUTION_CLASS;
172       }
173       sub getInsertionClass {
174   1663 0 my $self = shift;
175       return $SO_INSERTION_CLASS;
176       }
177       sub getDeletionClass {
178   692 0 my $self = shift;
179       return $SO_DELETION_CLASS;
180       }
181       sub getComplexIndelClass {
182   598 0 my $self = shift;
183       return $SO_COMPLEXINDEL_CLASS;
184       }
185       # sequence variant classes
186        
187       sub getUnknownVariantClass {
188   1503 0 my $self = shift;
189       return $SO_UNKNOWN_VARIANT_CLASS;
190       }
191       sub getIntronVariantClass {
192   1103 0 my $self = shift;
193       return $SO_INTRON_VARIANT_CLASS;
194       }
195       sub getSynonymousVariantClass {
196   4 0 my $self = shift;
197       return $SO_SYN_VARIANT_CLASS;
198       }
199       sub getNonSynonymousVariantClass {
200   12 0 my $self = shift;
201       return $SO_NON_SYN_VARIANT_CLASS;
202       }
203       sub getStopGainedVariantClass {
204   14 0 my $self = shift;
205       return $SO_STOP_GAINED_VARIANT_CLASS;
206       }
207       sub getStopLostVariantClass {
208   12 0 my $self = shift;
209       return $SO_STOP_LOST_VARIANT_CLASS;
210       }
211       sub getStopRetainedVariantClass {
212   0 0 my $self = shift;
213       return $SO_STOP_RETAINED_VARIANT_CLASS;
214       }
215       sub getStartLostVariantClass {
216   30 0 my $self = shift;
217       return $SO_START_LOST_VARIANT_CLASS;
218       }
219       sub getPrematureStartGainedVariantClass{
220   4 0 my $self = shift;
221       return $SO_PREMATURE_START_GAINED_VARIANT_CLASS;
222       }
223       sub getCodonVariantClass{
224   55 0 my $self = shift;
225       return $SO_CODON_VARIANT_CLASS;
226       }
227       sub get5PrimeUtrVariantClass{
228   421 0 my $self = shift;
229       return $SO_5PRIME_UTR_VARIANT_CLASS;
230       }
231       sub get3PrimeUtrVariantClass{
232   292 0 my $self = shift;
233       return $SO_3PRIME_UTR_VARIANT_CLASS;
234       }
235       sub getNonCodingTranscriptVariantClass{
236   28 0 my $self = shift;
237       return $SO_NC_TRANS_VARIANT_CLASS;
238       }
239       sub getEssentialSpliceSiteVariantClass{
240   944 0 my $self = shift;
241       return $SO_ESS_SPLICE_VARIANT_CLASS;
242       }
243       sub getSpliceRegionVariantClass{
244   1232 0 my $self = shift;
245       return $SO_SPLICE_REGION_VARIANT_CLASS;
246       }
247       sub getFrameShiftVariantClass{
248   569 0 my $self = shift;
249       return $SO_FRAMESHIFT_VARIANT_CLASS;
250       }
251       sub getInFrameVariantClass{
252   562 0 my $self = shift;
253       return $SO_INFRAME_VARIANT_CLASS;
254       }
255       sub getInFrameCodonGainVariantClass {
256   16 0 my $self = shift;
257       return $SO_INFRAME_CODON_GAIN_VARIANT_CLASS;
258       }
259       sub getInFrameCodonLossVariantClass {
260   10 0 my $self = shift;
261       return $SO_INFRAME_CODON_LOSS_VARIANT_CLASS;
262       }
263       sub getComplexChangeVariantClass {
264   1024 0 my $self = shift;
265       return $SO_COMPLEX_CHANGE_VARIANT_CLASS;
266       }
267       sub get5KBUpStreamVariantClass {
268   1233 0 my $self = shift;
269       return $SO_5KB_UPSTREAM_VARIANT_CLASS;
270       }
271       sub get2KBUpStreamVariantClass {
272   1276 0 my $self = shift;
273       return $SO_2KB_UPSTREAM_VARIANT_CLASS;
274       }
275       sub get5KBDownStreamVariantClass {
276   1166 0 my $self = shift;
277       return $SO_5KB_DOWNSTREAM_VARIANT_CLASS;
278       }
279       sub get500BPDownStreamVariantClass {
280   1178 0 my $self = shift;
281       return $SO_500BP_DOWNSTREAM_VARIANT_CLASS;
282       }
283       # variant transcript region classes
284        
285       sub getExonClass {
286   1078 0 my $self = shift;
287       return $SO_EXON_CLASS;
288       }
289        
290       sub getIntronClass {
291   1285 0 my $self = shift;
292       return $SO_INTRON_CLASS;
293       }
294        
295       sub getSpliceRegionClass {
296   982 0 my $self = shift;
297       return $SO_SPLICE_REGION_CLASS;
298       }
299        
300       sub getEssentialSpliceSiteClass {
301   966 0 my $self = shift;
302       return $SO_ESS_SPLICE_SITE_CLASS;
303       }
304        
305       # mature transcript mRNA region classes
306        
307       sub getCDSClass {
308   976 0 my $self = shift;
309       return $SO_CDS_CLASS;
310       }
311        
312       sub get5PrimeUtrClass {
313   453 0 my $self = shift;
314       return $SO_5PRIME_UTR_CLASS;
315       }
316        
317       sub get3PrimeUtrClass {
318   293 0 my $self = shift;
319       return $SO_3PRIME_UTR_CLASS;
320       }
321        
322       # gene/transcript type classes
323        
324       sub getProteinCodingClass {
325   1448 0 my $self = shift;
326       return $SO_PROTEIN_CODING_CLASS;
327       }
328        
329       sub getNonProteinCodingClass {
330   222 0 my $self = shift;
331       return $SO_NON_PROTEIN_CODING_CLASS;
332       }
333        
334       1;