File Coverage

lib/Sanger/CGP/Vcf/VcfUtil.pm
Criterion Covered Total %
branch 20 24 83.3
subroutine 11 11 100.0
pod 0 6 0.0
total 31 41 75.6


line bran sub pod code
1       package Sanger::CGP::Vcf::VcfUtil;
2        
3       ##########LICENCE##########
4       # Copyright (c) 2014,2015 Genome Research Ltd.
5       #
6       # Author: Jon Hinton <cgpit@sanger.ac.uk>
7       #
8       # This file is part of cgpVcf.
9       #
10       # cgpVcf is free software: you can redistribute it and/or modify it under
11       # the terms of the GNU Affero General Public License as published by the Free
12       # Software Foundation; either version 3 of the License, or (at your option) any
13       # later version.
14       #
15       # This program is distributed in the hope that it will be useful, but WITHOUT
16       # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17       # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
18       # details.
19       #
20       # You should have received a copy of the GNU Affero General Public License
21       # along with this program. If not, see <http://www.gnu.org/licenses/>.
22       #
23       # 1. The usage of a range of years within a copyright statement contained within
24       # this distribution should be interpreted as being equivalent to a list of years
25       # including the first and last year specified and all consecutive years between
26       # them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
27       # 2009, 2011-2012’ should be interpreted as being identical to a statement that
28       # reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
29       # statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
30       # identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
31       # 2009, 2010, 2011, 2012’."
32       ########## LICENCE ##########
33        
34        
35   3   use Sanger::CGP::Vcf;
36       our $VERSION = Sanger::CGP::Vcf->VERSION;
37        
38   3   use strict;
39   3   use warnings FATAL => 'all';
40   3   use Carp;
41   3   use Vcf;
42        
43       1;
44        
45       =head gen_tn_vcf_header
46        
47       A string generator for generating a uniform header section for NORMAL/TUMOUR comparisons. Useful if you do not want to include the VcfTools lib.
48        
49       @param1 wt_sample - a Sanger::CGP::Vcf::Sample object representing the wild type sample.
50        
51       @param2 mt_sample - a Sanger::CGP::Vcf::Sample object representing the mutant type sample.
52        
53       @param3 contigs - an array-ref of Sanger::CGP::Vcf::Contig object.
54        
55       @param4 process_logs - an array-ref of Sanger::CGP::Vcf::VcfProcessLog objects.
56        
57       @param5 reference_name - a String containing the name of the reference used in the VCF.
58        
59       @param6 input_source - a String containing the name and version of the application or source of the VCF data.
60        
61       @param7 info - an array-ref of hash-refs containing VCF formatted INFO data.
62        
63       @param8 format - an array-ref of hash-refs containing VCF formatted FORMAT data.
64        
65       @param9 other - an array-ref of hash-refs containing VCF formatted header data.
66        
67       @returns - String containing a fully formatted VCF header.
68        
69       =cut
70       sub gen_tn_vcf_header{
71   2 0 my($wt_sample, $mt_sample, $contigs, $process_logs, $reference_name, $input_source, $info, $format, $other) = @_;
72       my $vcf = Vcf->new(version=>'4.1');
73       init_tn_vcf_header($vcf, $wt_sample, $mt_sample, $contigs, $process_logs, $reference_name, $input_source, $info, $format, $other);
74       return $vcf->format_header();
75       }
76        
77       =head init_tn_vcf_header
78        
79       Initialises the header of a Vcf object for NORMAL/TUMOUR comparisons. Useful if you do not want to include the VcfTools lib.
80        
81       @param1 vcf - a Vcf object.
82        
83       @param2 wt_sample - a Sanger::CGP::Vcf::Sample object representing the wild type sample.
84        
85       @param3 mt_sample - a Sanger::CGP::Vcf::Sample object representing the mutant type sample.
86        
87       @param4 contigs - an array-ref of Sanger::CGP::Vcf::Contig objects.
88        
89       @param5 process_logs - an array-ref of Sanger::CGP::Vcf::VcfProcessLog objects.
90        
91       @param6 reference_name - a String containing the name of the reference used in the VCF.
92        
93       @param7 input_source - a String containing the name and version of the application or source of the VCF data.
94        
95       @param8 info - an array-ref of hash-refs containing VCF formatted INFO data.
96        
97       @param9 format - an array-ref of hash-refs containing VCF formatted FORMAT data.
98        
99       @param10 other - an array-ref of hash-refs containing VCF formatted header data.
100        
101       =cut
102       sub init_tn_vcf_header{
103   2 0 my($vcf, $wt_sample, $mt_sample, $contigs, $process_logs, $reference_name, $input_source, $info, $format, $other) = @_;
104        
105       $vcf->add_header_line( { key => 'fileDate', value => get_date() } );
106       $vcf->add_header_line( { key => 'source', value => $input_source }, 'append' => 1 );
107       $vcf->add_header_line( { key => 'reference', value => $reference_name } );
108        
109       for my $contig (@{$contigs}){
110       add_vcf_contig($vcf,$contig)
111       }
112        
113       for my $inf (@{$info}){
114       $vcf->add_header_line($inf);
115       }
116        
117       for my $for (@{$format}){
118       $vcf->add_header_line($for);
119       }
120        
121       for my $oth (@{$other}){
122       $vcf->add_header_line($oth);
123       }
124        
125       for my $process_log (@{$process_logs}){
126       add_vcf_process_log($vcf,$process_log)
127       }
128        
129       add_vcf_sample($vcf, $wt_sample, 'NORMAL');
130       add_vcf_sample($vcf, $mt_sample, 'TUMOUR');
131       }
132        
133       =head add_vcf_sample
134        
135       Adds a Sanger::CGP::Pindel::OutputGen::Sample object to a Vcf header object. The order of entry is important as it determines the order of the data in the resulting .vcf file.
136        
137       @param1 vcf - a Vcf object.
138        
139       @param2 sample - a Sanger::CGP::Vcf::Sample object.
140        
141       @param3 id - String, the id of the sample to be displayed in the VCF file.
142        
143       =cut
144       sub add_vcf_sample{
145   11 0 my($vcf, $sample, $id) = @_;
146        
147 100     $id = $sample->name unless defined $id and $id ne q{};
148        
149       my %input_hash = (
150       key => 'SAMPLE',
151       ID => $id,
152       SampleName => $sample->name
153       ); ## will use the natural order of the hash I think...
154        
155        
156       #push %input_hash , 'Description', $sample->description if $sample->description;
157        
158 100     $input_hash{Description} = $sample->description if $sample->description;
159 100     $input_hash{Study} = $sample->study if $sample->study;
160 100     $input_hash{Source} = $sample->accession_source if $sample->accession_source;
161 100     $input_hash{Accession} = $sample->accession if $sample->accession;
162 100     $input_hash{Platform} = $sample->platform if $sample->platform;
163 100     $input_hash{Protocol} = $sample->seq_protocol if $sample->seq_protocol;
164        
165       $vcf->add_header_line(\%input_hash);
166       $vcf->add_columns( $id );
167       }
168        
169       =head add_vcf_contig
170        
171       Adds a Sanger::CGP::Pindel::OutputGen::Contig object to a Vcf header object.
172        
173       @param1 vcf - a Vcf object.
174        
175       @param2 contig - a Sanger::CGP::Vcf::Contig object.
176        
177       =cut
178       sub add_vcf_contig{
179   3 0 my($vcf, $contig) = @_;
180        
181       my %input_hash = (
182       key => 'contig',
183       ID => $contig->name,
184       assembly => $contig->assembly,
185       length => $contig->length,
186       species => $contig->species,
187       ); ## will use the natural order of the hash I think...
188        
189 50     $input_hash{md5} = $contig->checksum if $contig->checksum;
190       $vcf->add_header_line(\%input_hash);
191       }
192        
193       =head add_vcf_process_log
194        
195       Adds a Sanger::CGP::Vcf::VcfProcessLog object to a Vcf header object.
196        
197       @param1 vcf - a Vcf object.
198        
199       @param2 process_log - a Sanger::CGP::Pindel::OutputGen::VcfProcessLog object.
200        
201       =cut
202       sub add_vcf_process_log{
203   3 0 my($vcf, $process_log) = @_;
204        
205       my %input_hash = (key => 'vcfProcessLog');
206 50     $input_hash{InputVCF} = $process_log->input_vcf if $process_log->input_vcf;
207 50     $input_hash{InputVCFSource} = $process_log->input_vcf_source if $process_log->input_vcf_source;
208 50     $input_hash{InputVCFVer} = $process_log->input_vcf_ver if $process_log->input_vcf_ver;
209 100     $input_hash{InputVCFParam} = $process_log->input_vcf_params if $process_log->input_vcf_params;
210       $vcf->add_header_line(\%input_hash, append => 1);
211       }
212        
213       sub get_date {
214   5 0 my @timeData = localtime(time);
215       my $year = 1900 + $timeData[5];
216       return
217       $year
218       . sprintf( "%02d", $timeData[4]+1 )
219       . sprintf( "%02d", $timeData[3] );
220       }