Skip to content

Commit e4b8ecf

Browse files
committed
Fix per-sample statistics
Some of the per-sample stats were assigned to different samples when `bcftools stats -s/-S`, depending on the order and the sample count. Resolves #2469, pull request #2534 with a test added
1 parent 48d3e75 commit e4b8ecf

4 files changed

Lines changed: 26 additions & 8 deletions

File tree

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ Changes affecting specific commands:
3333
unwanted pair of records, it also led to unexpected behavior when the first
3434
out-of-window record would change by filtering (#2475)
3535

36+
* bcftools stats
37+
38+
- Fix per-sample stats when samples provided in different order via -s/-S (#2534)
39+
3640
* bcftools +trio-dnm3
3741

3842
- Add a new TrioDNM model and make it the new default. To prevent confusion, the

test/stats-smpl-order.vcf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=1,length=1000000>
3+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
4+
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read depth">
5+
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT A B
7+
1 100 . A G 60 PASS . GT:DP:AD 0/1:10:6,4 0/1:100:1,99
8+
1 200 . AT A 60 PASS . GT:DP:AD 0/1:20:10,10 0/1:200:190,10

test/test.pl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/env perl
22
#
3-
# Copyright (C) 2012-2025 Genome Research Ltd.
3+
# Copyright (C) 2012-2026 Genome Research Ltd.
44
#
55
# Author: Petr Danecek <pd3@sanger.ac.uk>
66
#
@@ -44,6 +44,8 @@
4444
run_test(\&test_vcf_idxstats,$opts,in=>'empty',args=>'-n',out=>'empty.idx_count.out');
4545
run_test(\&test_vcf_check,$opts,in=>'check',out=>'check.chk');
4646
run_test(\&test_vcf_check_merge,$opts,in=>'check',out=>'check_merge.chk');
47+
run_test(\&test_vcf_stats,$opts,in=>['stats-smpl-order'],out=>'stats-smpl-order.chk',args=>'-s A,B',pipe=>'sort');
48+
run_test(\&test_vcf_stats,$opts,in=>['stats-smpl-order'],out=>'stats-smpl-order.chk',args=>'-s B,A',pipe=>'sort');
4749
run_test(\&test_vcf_stats,$opts,in=>['stats.a','stats.b'],out=>'stats.chk',args=>'-s -');
4850
run_test(\&test_vcf_stats,$opts,in=>['stats.a','stats.b'],out=>'stats.B.chk',args=>'-s B');
4951
run_test(\&test_vcf_stats,$opts,in=>['stats.counts'],out=>'stats.counts.chk',args=>'-s -');
@@ -1511,7 +1513,9 @@ sub test_vcf_stats
15111513
bgzip_tabix_vcf($opts,$file);
15121514
$files .= " $$opts{tmp}/$file.vcf.gz";
15131515
}
1514-
test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools stats $args{args} $files | grep -v '^#' | grep -v '^ID\t'");
1516+
my $pipe = qq[grep -v '^#' | grep -v '^ID\t' ];
1517+
if ( exists($args{pipe}) ) { $pipe .= ' | '. $args{pipe}; }
1518+
test_cmd($opts,%args,cmd=>"$$opts{bin}/bcftools stats $args{args} $files | $pipe");
15151519
}
15161520
sub test_vcf_merge
15171521
{

vcfstats.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* vcfstats.c -- Produces stats which can be plotted using plot-vcfstats.
22
3-
Copyright (C) 2012-2025 Genome Research Ltd.
3+
Copyright (C) 2012-2026 Genome Research Ltd.
44
55
Author: Petr Danecek <pd3@sanger.ac.uk>
66
@@ -1103,8 +1103,10 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
11031103
int is;
11041104
for (is=0; is<args->files->n_smpl; is++)
11051105
{
1106+
int ismpl = reader->samples[is]; // VCF column index for this sample after possible reordering
1107+
11061108
// Determine depth
1107-
int dp = calc_sample_depth(args,is,ad_fmt_ptr,dp_fmt_ptr);
1109+
int dp = calc_sample_depth(args,ismpl,ad_fmt_ptr,dp_fmt_ptr);
11081110
if ( dp>0 )
11091111
{
11101112
(*idist(&stats->dp, dp))++;
@@ -1116,7 +1118,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
11161118
int ial, jal, gt=GT_UNKN;
11171119
if ( gt_fmt_ptr )
11181120
{
1119-
gt = bcf_gt_type(gt_fmt_ptr, reader->samples[is], &ial, &jal);
1121+
gt = bcf_gt_type(gt_fmt_ptr, ismpl, &ial, &jal);
11201122
sample_gt_stats(args,stats,line,is,gt,ial,jal);
11211123
}
11221124

@@ -1126,12 +1128,12 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
11261128
float iad = 0, jad = 0;
11271129
if ( gt==GT_UNKN ) // GT not available
11281130
{
1129-
iad = get_ad(line,ad_fmt_ptr,is,&ial);
1131+
iad = get_ad(line,ad_fmt_ptr,ismpl,&ial);
11301132
}
11311133
else if ( gt!=GT_UNKN )
11321134
{
1133-
iad = ial==0 ? 0 : get_iad(line,ad_fmt_ptr,is,ial);
1134-
jad = jal==0 ? 0 : get_iad(line,ad_fmt_ptr,is,jal);
1135+
iad = ial==0 ? 0 : get_iad(line,ad_fmt_ptr,ismpl,ial);
1136+
jad = jal==0 ? 0 : get_iad(line,ad_fmt_ptr,ismpl,jal);
11351137
}
11361138
if ( iad )
11371139
{

0 commit comments

Comments
 (0)