public class GenotypeConcordance extends CommandLineProgram
Modifier and Type | Field and Description |
---|---|
java.lang.String |
CALL_SAMPLE |
java.io.File |
CALL_VCF |
static java.lang.String |
CONTINGENCY_METRICS_FILE_EXTENSION |
static java.lang.String |
DETAILED_METRICS_FILE_EXTENSION |
protected GenotypeConcordanceCounts |
indelCounter |
boolean |
INTERSECT_INTERVALS |
java.util.List<java.io.File> |
INTERVALS |
int |
MIN_DP |
int |
MIN_GQ |
boolean |
MISSING_SITES_HOM_REF |
java.io.File |
OUTPUT |
boolean |
OUTPUT_ALL_ROWS |
protected GenotypeConcordanceCounts |
snpCounter |
static java.lang.String |
SUMMARY_METRICS_FILE_EXTENSION |
java.lang.String |
TRUTH_SAMPLE |
java.io.File |
TRUTH_VCF |
boolean |
USE_VCF_INDEX |
COMPRESSION_LEVEL, CREATE_INDEX, CREATE_MD5_FILE, GA4GH_CLIENT_SECRETS, MAX_RECORDS_IN_RAM, QUIET, REFERENCE_SEQUENCE, TMP_DIR, VALIDATION_STRINGENCY, VERBOSITY
Constructor and Description |
---|
GenotypeConcordance() |
Modifier and Type | Method and Description |
---|---|
static void |
addMissingTruthAndMissingCallStates(double numVariants,
long intervalBaseCount,
GenotypeConcordanceCounts counter)
Method to add missing sites that are KNOWN to be HOM_REF in the case of the NIST truth data set.
|
static boolean |
classifyVariants(java.util.Optional<htsjdk.variant.variantcontext.VariantContext> truthContext,
java.lang.String truthSample,
java.util.Optional<htsjdk.variant.variantcontext.VariantContext> callContext,
java.lang.String callSample,
int minGq,
int minDp) |
static boolean |
classifyVariants(java.util.Optional<htsjdk.variant.variantcontext.VariantContext> truthContext,
java.lang.String truthSample,
java.util.Optional<htsjdk.variant.variantcontext.VariantContext> callContext,
java.lang.String callSample,
java.util.Optional<GenotypeConcordanceCounts> snpCounter,
java.util.Optional<GenotypeConcordanceCounts> indelCounter,
int minGq,
int minDp)
Attempts to determine the concordance state given the truth and all variant context and optionally increments the genotype concordance
count for the given variant type (SNP or INDEL).
|
protected java.lang.String[] |
customCommandLineValidation()
Put any custom command-line validation in an override of this method.
|
static GenotypeConcordanceStates.TruthAndCallStates |
determineState(htsjdk.variant.variantcontext.VariantContext truthContext,
java.lang.String truthSample,
htsjdk.variant.variantcontext.VariantContext callContext,
java.lang.String callSample,
int minGq,
int minDp)
A method to determine the truth and call states for a pair of variant contexts representing truth and call.
|
protected int |
doWork()
Do the work after command line has been parsed.
|
GenotypeConcordanceCounts |
getIndelCounter() |
GenotypeConcordanceCounts |
getSnpCounter() |
static void |
main(java.lang.String[] args) |
static void |
outputDetailMetricsFile(htsjdk.variant.variantcontext.VariantContext.Type variantType,
htsjdk.samtools.metrics.MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile,
GenotypeConcordanceCounts counter,
java.lang.String truthSampleName,
java.lang.String callSampleName,
boolean missingSitesHomRef,
boolean outputAllRows)
Outputs the detailed statistics tables for SNP and Indel match categories.
|
getCommandLine, getCommandLineParser, getDefaultHeaders, getMetricsFile, getNestedOptions, getNestedOptionsForHelp, getStandardUsagePreamble, getVersion, instanceMain, instanceMainWithExit, parseArgs, setDefaultHeaders
@Option(shortName="TV", doc="The VCF containing the truth sample") public java.io.File TRUTH_VCF
@Option(shortName="CV", doc="The VCF containing the call sample") public java.io.File CALL_VCF
@Option(shortName="O", doc="Basename for the two metrics files that are to be written. Resulting files will be <OUTPUT>.genotype_concordance_summary_metrics and <OUTPUT>.genotype_concordance_detail_metrics.") public java.io.File OUTPUT
@Option(shortName="TS", doc="The name of the truth sample within the truth VCF") public java.lang.String TRUTH_SAMPLE
@Option(shortName="CS", doc="The name of the call sample within the call VCF") public java.lang.String CALL_SAMPLE
@Option(doc="One or more interval list files that will be used to limit the genotype concordance. Note - if intervals are specified, the VCF files must be indexed.") public java.util.List<java.io.File> INTERVALS
@Option(doc="If true, multiple interval lists will be intersected. If false multiple lists will be unioned.") public boolean INTERSECT_INTERVALS
@Option(doc="Genotypes below this genotype quality will have genotypes classified as LowGq.") public int MIN_GQ
@Option(doc="Genotypes below this depth will have genotypes classified as LowDp.") public int MIN_DP
@Option(doc="If true, output all rows in detailed statistics even when count == 0. When false only output rows with non-zero counts.") public boolean OUTPUT_ALL_ROWS
@Option(doc="If true, use the VCF index, else iterate over the entire VCF.", optional=true) public boolean USE_VCF_INDEX
@Option(shortName="MISSING_HOM", doc="Default is false, which follows the GA4GH Scheme. If true, missing sites in the truth set will be treated as HOM_REF sites and sites missing in both the truth and call sets will be true negatives. Useful when hom ref sites are left out of the truth set. This flag can only be used with a high confidence interval list.") public boolean MISSING_SITES_HOM_REF
public static final java.lang.String SUMMARY_METRICS_FILE_EXTENSION
public static final java.lang.String DETAILED_METRICS_FILE_EXTENSION
public static final java.lang.String CONTINGENCY_METRICS_FILE_EXTENSION
protected GenotypeConcordanceCounts snpCounter
protected GenotypeConcordanceCounts indelCounter
public GenotypeConcordanceCounts getSnpCounter()
public GenotypeConcordanceCounts getIndelCounter()
public static void main(java.lang.String[] args)
protected java.lang.String[] customCommandLineValidation()
CommandLineProgram
customCommandLineValidation
in class CommandLineProgram
protected int doWork()
CommandLineProgram
doWork
in class CommandLineProgram
public static boolean classifyVariants(java.util.Optional<htsjdk.variant.variantcontext.VariantContext> truthContext, java.lang.String truthSample, java.util.Optional<htsjdk.variant.variantcontext.VariantContext> callContext, java.lang.String callSample, int minGq, int minDp)
public static boolean classifyVariants(java.util.Optional<htsjdk.variant.variantcontext.VariantContext> truthContext, java.lang.String truthSample, java.util.Optional<htsjdk.variant.variantcontext.VariantContext> callContext, java.lang.String callSample, java.util.Optional<GenotypeConcordanceCounts> snpCounter, java.util.Optional<GenotypeConcordanceCounts> indelCounter, int minGq, int minDp)
truthContext
- A variant context representing truthtruthSample
- The name of the truth samplecallContext
- A variant context representing the callcallSample
- The name of the call samplesnpCounter
- optionally a place to increment the counts for SNP truth/call statesindelCounter
- optionally a place to increment the counts for INDEL truth/call statesminGq
- Threshold for filtering by genotype attribute GQminDp
- Threshold for filtering by genotype attribute DPpublic static void addMissingTruthAndMissingCallStates(double numVariants, long intervalBaseCount, GenotypeConcordanceCounts counter)
public static void outputDetailMetricsFile(htsjdk.variant.variantcontext.VariantContext.Type variantType, htsjdk.samtools.metrics.MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile, GenotypeConcordanceCounts counter, java.lang.String truthSampleName, java.lang.String callSampleName, boolean missingSitesHomRef, boolean outputAllRows)
public static final GenotypeConcordanceStates.TruthAndCallStates determineState(htsjdk.variant.variantcontext.VariantContext truthContext, java.lang.String truthSample, htsjdk.variant.variantcontext.VariantContext callContext, java.lang.String callSample, int minGq, int minDp)
truthContext
- A variant context representing truthtruthSample
- The name of the truth samplecallContext
- A variant context representing the callcallSample
- The name of the call sampleminGq
- Threshold for filtering by genotype attribute GQminDp
- Threshold for filtering by genotype attribute DP