Page tree
Skip to end of metadata
Go to start of metadata

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

Version 1 Next »

Consider this multi-sample VCF input record at chromosome 1 position 100. It lists four samples with their genotypes being; homozygous reference [AA/AA], heterozygous SNP [AA/AT], heterozygous insertion [AT/AAC] and heterozygous deletion [AA/A]: 

#CHROM POS    REF    ALT       FORMAT  SAMPLE1       SAMPLE2        SAMPLE3        SAMPLE4
1 100 AA AT,AAC,A AT:AD 0/0:40,1,0,0 0/1:19,20,1,0 2/1:0:20,22,0 0/3:19,0,0,20

The OpenCB Variant Normalization process normalises this record into four individual variants. The JSON representation of the Variant objects is as follows:

{
"id" : "1:100:A:-",
"chromosome" : "1",
"start" : 100,
"end" : 100,
"reference" : "A",
"alternate" : "",
"type" : "INDEL",
"studies" : [ {
"files" : [ {
"call" : {
"variantId" : "1:100:AA:AT,AAC,A",
"alleleIndex" : 2
}
} ],
"secondaryAlternates" : [ {
"chromosome" : "1",
"start" : 101,
"end" : 101,
"reference" : "A",
"alternate" : "T",
"type" : "SNV"
}, {
"chromosome" : "1",
"start" : 102,
"end" : 101,
"reference" : "",
"alternate" : "C",
"type" : "INDEL"
} ],
"sampleDataKeys" : [ "GT", "AD" ],
"samples" : [ {
"sampleId" : "SAMPLE1",
"data" : [ "0/0", "40,0,1,0" ]
}, {
"sampleId" : "SAMPLE2",
"data" : [ "0/2", "19,0,20,1" ]
}, {
"sampleId" : "SAMPLE3",
"data" : [ "3/2", "0:20,0,22,0" ]
}, {
"sampleId" : "SAMPLE4",
"data" : [ "0/1", "19,20,0,0" ]
} ]
} ]
}{
"id" : "1:101:A:T",
"chromosome" : "1",
"start" : 101,
"end" : 101,
"reference" : "A",
"alternate" : "T",
"type" : "SNV",
"studies" : [ {
"files" : [ {
"call" : {
"variantId" : "1:100:AA:AT,AAC,A",
"alleleIndex" : 0
}
} ],
"secondaryAlternates" : [ {
"chromosome" : "1",
"start" : 102,
"end" : 101,
"reference" : "",
"alternate" : "C",
"type" : "INDEL"
}, {
"chromosome" : "1",
"start" : 100,
"end" : 100,
"reference" : "A",
"alternate" : "",
"type" : "INDEL"
} ],
"sampleDataKeys" : [ "GT", "AD" ],
"samples" : [ {
"sampleId" : "SAMPLE1",
"data" : [ "0/0", "40,1,0,0" ]
}, {
"sampleId" : "SAMPLE2",
"data" : [ "0/1", "19,20,1,0" ]
}, {
"sampleId" : "SAMPLE3",
"data" : [ "2/1", "0:20,22,0,0" ]
}, {
"sampleId" : "SAMPLE4",
"data" : [ "0/3", "19,0,0,20" ]
} ]
} ]
}
{
"id" : "1:102:-:C",
"chromosome" : "1",
"start" : 102,
"end" : 101,
"alternate" : "C",
"type" : "INDEL",
"studies" : [ {
"files" : [ {
"call" : {
"variantId" : "1:100:AA:AT,AAC,A",
"alleleIndex" : 1
}
} ],
"secondaryAlternates" : [ {
"chromosome" : "1",
"start" : 101,
"end" : 101,
"reference" : "A",
"alternate" : "T",
"type" : "SNV"
}, {
"chromosome" : "1",
"start" : 100,
"end" : 100,
"reference" : "A",
"alternate" : "",
"type" : "INDEL"
} ],
"sampleDataKeys" : [ "GT", "AD" ],
"samples" : [ {
"sampleId" : "SAMPLE1",
"data" : [ "0/0", "40,0,1,0" ]
}, {
"sampleId" : "SAMPLE2",
"data" : [ "0/2", "19,1,20,0" ]
}, {
"sampleId" : "SAMPLE3",
"data" : [ "1/2", "0:20,0,22,0" ]
}, {
"sampleId" : "SAMPLE4",
"data" : [ "0/3", "19,0,0,20" ]
} ]
} ]
}
  • No labels