line |
bran |
sub |
pod |
code |
1
|
|
|
|
package Sanger::CGP::Vcf::VCFCNConverter; |
2
|
|
|
|
|
3
|
|
|
|
##########LICENCE########## |
4
|
|
|
|
# Copyright (c) 2014,2015 Genome Research Ltd. |
5
|
|
|
|
# |
6
|
|
|
|
# Author: David Jones <cgpit@sanger.ac.uk> |
7
|
|
|
|
# |
8
|
|
|
|
# This file is part of cgpVcf. |
9
|
|
|
|
# |
10
|
|
|
|
# cgpVcf is free software: you can redistribute it and/or modify it under |
11
|
|
|
|
# the terms of the GNU Affero General Public License as published by the Free |
12
|
|
|
|
# Software Foundation; either version 3 of the License, or (at your option) any |
13
|
|
|
|
# later version. |
14
|
|
|
|
# |
15
|
|
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT |
16
|
|
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
17
|
|
|
|
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
18
|
|
|
|
# details. |
19
|
|
|
|
# |
20
|
|
|
|
# You should have received a copy of the GNU Affero General Public License |
21
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>. |
22
|
|
|
|
# |
23
|
|
|
|
# 1. The usage of a range of years within a copyright statement contained within |
24
|
|
|
|
# this distribution should be interpreted as being equivalent to a list of years |
25
|
|
|
|
# including the first and last year specified and all consecutive years between |
26
|
|
|
|
# them. For example, a copyright statement that reads âCopyright (c) 2005, 2007- |
27
|
|
|
|
# 2009, 2011-2012â should be interpreted as being identical to a statement that |
28
|
|
|
|
# reads âCopyright (c) 2005, 2007, 2008, 2009, 2011, 2012â and a copyright |
29
|
|
|
|
# statement that reads âCopyright (c) 2005-2012â should be interpreted as being |
30
|
|
|
|
# identical to a statement that reads âCopyright (c) 2005, 2006, 2007, 2008, |
31
|
|
|
|
# 2009, 2010, 2011, 2012â." |
32
|
|
|
|
########## LICENCE ########## |
33
|
|
|
|
|
34
|
|
2
|
|
use strict; |
35
|
|
2
|
|
use warnings FATAL => 'all'; |
36
|
|
2
|
|
use Sanger::CGP::Vcf::VcfUtil; |
37
|
|
2
|
|
use Sanger::CGP::Vcf; |
38
|
|
2
|
|
use Const::Fast qw(const); |
39
|
|
|
|
|
40
|
|
|
|
const my $SEP => "\t"; |
41
|
|
|
|
const my $NL => "\n"; |
42
|
|
|
|
const my $BLANK => "."; |
43
|
|
|
|
const my $CNV => 'CNV'; |
44
|
|
|
|
const my $FORMAT => 'GT:TCN:MCN'; |
45
|
|
|
|
|
46
|
|
|
|
|
47
|
|
|
|
our $VERSION = Sanger::CGP::Vcf->VERSION; |
48
|
|
|
|
|
49
|
|
|
|
1; |
50
|
|
|
|
|
51
|
|
|
|
sub new{ |
52
|
|
3
|
0
|
my $proto = shift; |
53
|
|
|
|
my (%args) = @_; |
54
|
|
|
|
my $class = ref($proto) || $proto; |
55
|
|
|
|
|
56
|
|
|
|
my $self = {}; |
57
|
|
|
|
bless $self, $class; |
58
|
|
|
|
|
59
|
|
|
|
$self->init(%args); |
60
|
|
|
|
|
61
|
|
|
|
return $self; |
62
|
|
|
|
} |
63
|
|
|
|
|
64
|
|
|
|
sub init{ |
65
|
|
3
|
0
|
my($self,%args) = @_; |
66
|
|
|
|
$self->{_contigs} = $args{-contigs}; |
67
|
|
|
|
} |
68
|
|
|
|
|
69
|
|
|
|
=head generate_header |
70
|
|
|
|
|
71
|
|
|
|
Generates a Vcf header String for NORMAL/TUMOUR comparisons. |
72
|
|
|
|
|
73
|
|
|
|
@param1 wt_sample - a Sanger::CGP::Vcf::Sample object representing the normal sample. |
74
|
|
|
|
|
75
|
|
|
|
@param2 mt_sample - a Sanger::CGP::Vcf::Sample object representing the mutant sample. |
76
|
|
|
|
|
77
|
|
|
|
@param3 process_logs - an array-ref of Sanger::CGP::Vcf::VcfProcessLog objects. |
78
|
|
|
|
|
79
|
|
|
|
@param5 reference_name - a String containing the name of the reference used in the VCF. |
80
|
|
|
|
|
81
|
|
|
|
@param6 input_source - a String containing the name and version of the application or source of the VCF data. |
82
|
|
|
|
|
83
|
|
|
|
=cut |
84
|
|
|
|
sub generate_header{ |
85
|
|
1
|
0
|
my($self,$wt_sample, $mt_sample, $process_logs, $reference_name, $input_source) = @_; |
86
|
|
|
|
|
87
|
|
|
|
my $contigs = $self->{_contigs}; |
88
|
|
|
|
|
89
|
|
|
|
my $info = [ |
90
|
|
|
|
{key => 'INFO', ID => 'END', Number => 1, Type => 'Integer', Description => 'End position of this structural variant'}, |
91
|
|
|
|
{key => 'INFO', ID => 'SVTYPE', Number => 1, Type => 'String', Description => 'Type of structural variant'}, |
92
|
|
|
|
{key => 'ALT', ID => 'CNV', Description => 'Copy number variable region'}, |
93
|
|
|
|
]; |
94
|
|
|
|
|
95
|
|
|
|
my $format = [ |
96
|
|
|
|
{key => 'FORMAT', ID => 'GT', Number => 1, Type => 'String', Description => 'Genotype'}, |
97
|
|
|
|
{key => 'FORMAT', ID => 'TCN', Number => 1, Type => 'Integer', Description => 'Total copy number'}, |
98
|
|
|
|
{key => 'FORMAT', ID => 'MCN', Number => 1, Type => 'Integer', Description => 'Minor allele copy number'}, |
99
|
|
|
|
]; |
100
|
|
|
|
|
101
|
|
|
|
return Sanger::CGP::Vcf::VcfUtil::gen_tn_vcf_header( $wt_sample, $mt_sample, $contigs, $process_logs, $reference_name, $input_source, $info, $format, []); |
102
|
|
|
|
} |
103
|
|
|
|
|
104
|
|
|
|
|
105
|
|
|
|
sub generate_record{ |
106
|
|
1
|
0
|
my ($self,$chr,$start,$end,$start_allele,$wt_cn_tot,$wt_cn_min,$mt_cn_tot,$mt_cn_min) = @_; |
107
|
|
|
|
# CHR POS ID REF ALT QUAL FILTER INFO FORMAT GENOSTUFF GENOSTUFF |
108
|
|
|
|
my $ret = $chr.$SEP; #chromsome |
109
|
|
|
|
$ret .= $start.$SEP; #Position (start for CNV) |
110
|
|
|
|
$ret .= $BLANK.$SEP; #ID |
111
|
|
|
|
$ret .= $start_allele.$SEP; #Allele at start position |
112
|
|
|
|
$ret .= '<'.$CNV.'>'.$SEP; #CNV marker for alt allele |
113
|
|
|
|
$ret .= $BLANK.$SEP; #Quality - unset for this |
114
|
|
|
|
$ret .= $BLANK.$SEP; #Filter |
115
|
|
|
|
|
116
|
|
|
|
#Info section |
117
|
|
|
|
$ret .= 'SVTYPE='.$CNV.';'; |
118
|
|
|
|
$ret .= 'END='.$end.$SEP; |
119
|
|
|
|
|
120
|
|
|
|
# format string |
121
|
|
|
|
$ret .= $FORMAT.$SEP; |
122
|
|
|
|
|
123
|
|
|
|
#Normal sample section |
124
|
|
|
|
$ret .= './.:'.$wt_cn_tot.':'.$wt_cn_min.$SEP; |
125
|
|
|
|
#Tumour sample section |
126
|
|
|
|
$ret .= './.:'.$mt_cn_tot.':'.$mt_cn_min.$NL; |
127
|
|
|
|
return $ret; |
128
|
|
|
|
} |
129
|
|
|
|
return 1; |