"""
This module provides functions for calculating diagnostic test
performance measures: sensitivity, specificity, predictive values,
likelihood ratios, and ROC curve analysis.
"""
import numpy as np
from typing import Tuple, Optional, Dict, List
from dataclasses import dataclass
from enum import Enum
from scipy import stats
[docs]
class DiagnosticMeasure(Enum):
"""Diagnostic performance measures."""
SENSITIVITY = "sensitivity"
SPECIFICITY = "specificity"
PPV = "ppv"
NPV = "npv"
LR_POSITIVE = "lr_positive"
LR_NEGATIVE = "lr_negative"
ACCURACY = "accuracy"
YOUDEN = "youden"
[docs]
@dataclass
class DiagnosticResult:
"""Rich result object for diagnostic test calculations."""
tp: int
fp: int
fn: int
tn: int
sensitivity: float
specificity: float
ppv: float
npv: float
lr_positive: float
lr_negative: float
accuracy: float
youden: float
prevalence: Optional[float] = None
[docs]
def __repr__(self) -> str:
return f"Sens: {self.sensitivity:.3f}, Spec: {self.specificity:.3f}, Acc: {self.accuracy:.3f}"
[docs]
def to_dict(self) -> Dict:
"""Convert result to dictionary."""
return {
"tp": self.tp,
"fp": self.fp,
"fn": self.fn,
"tn": self.tn,
"sensitivity": self.sensitivity,
"specificity": self.specificity,
"ppv": self.ppv,
"npv": self.npv,
"lr_positive": self.lr_positive,
"lr_negative": self.lr_negative,
"accuracy": self.accuracy,
"youden": self.youden,
"prevalence": self.prevalence
}
[docs]
def summary(self) -> str:
"""Generate text summary."""
return (f"Diagnostic Test Performance:\n"
f" Sensitivity: {self.sensitivity:.3f} (95% CI: {self._sens_ci()})\n"
f" Specificity: {self.specificity:.3f} (95% CI: {self._spec_ci()})\n"
f" PPV: {self.ppv:.3f}, NPV: {self.npv:.3f}\n"
f" LR+: {self.lr_positive:.3f}, LR-: {self.lr_negative:.3f}\n"
f" Accuracy: {self.accuracy:.3f}, Youden: {self.youden:.3f}")
def _sens_ci(self, confidence: float = 0.95) -> str:
"""Calculate sensitivity CI."""
from .descriptive import proportion_ci
ci = proportion_ci(self.tp, self.tp + self.fn, confidence=confidence)
return f"{ci.ci_lower:.3f}-{ci.ci_upper:.3f}"
def _spec_ci(self, confidence: float = 0.95) -> str:
"""Calculate specificity CI."""
from .descriptive import proportion_ci
ci = proportion_ci(self.tn, self.tn + self.fp, confidence=confidence)
return f"{ci.ci_lower:.3f}-{ci.ci_upper:.3f}"
[docs]
@dataclass
class ROCResult:
"""Result object for ROC curve analysis."""
fpr: np.ndarray
tpr: np.ndarray
thresholds: np.ndarray
auc: float
optimal_threshold: float
optimal_point: Dict[str, float]
method: str = "youden"
[docs]
def __repr__(self) -> str:
sens = self.optimal_point.get("sensitivity", float("nan"))
spec = self.optimal_point.get("specificity", float("nan"))
return (
f"ROC Curve AUC: {self.auc:.3f}\n"
f"Optimal threshold: {self.optimal_threshold:.3f} "
f"(Sens={sens:.3f}, Spec={spec:.3f})"
)
[docs]
def to_dict(self) -> Dict:
"""Return a JSON-serializable dictionary."""
return {
"auc": self.auc,
"optimal_threshold": self.optimal_threshold,
"optimal_sensitivity": self.optimal_point.get("sensitivity"),
"optimal_specificity": self.optimal_point.get("specificity"),
"optimal_youden": self.optimal_point.get("youden"),
"method": self.method,
"n_thresholds": len(self.thresholds),
}
[docs]
def plot(self, backend: str = "plotly", **kwargs):
"""Plot the ROC curve — opens in browser (Plotly) or returns Figure (Matplotlib)."""
try:
from episia.viz.roc import plot_roc
except ImportError:
from viz.roc import plot_roc
return plot_roc(self, backend=backend, **kwargs)
[docs]
def show(self):
"""Convenience: plot() then show() — equivalent to roc.plot().show()."""
fig = self.plot()
if hasattr(fig, "show"):
fig.show()
return fig
[docs]
def diagnostic_test_2x2(
tp: int,
fp: int,
fn: int,
tn: int,
prevalence: Optional[float] = None
) -> DiagnosticResult:
"""
Calculate diagnostic test performance from 2x2 table.
Args:
tp: True positives
fp: False positives
fn: False negatives
tn: True negatives
prevalence: Disease prevalence (for PPV/NPV if different from sample)
Returns:
DiagnosticResult object
Example:
>>> result = diagnostic_test_2x2(80, 20, 10, 90)
>>> print(result.sensitivity)
0.8889
"""
# Input validation
for value, name in [(tp, "tp"), (fp, "fp"), (fn, "fn"), (tn, "tn")]:
if value < 0:
raise ValueError(f"{name} must be non-negative")
# Total calculations
total = tp + fp + fn + tn
if total == 0:
raise ValueError("All values cannot be zero")
# Sample prevalence
sample_prevalence = (tp + fn) / total if total > 0 else 0
# Use provided prevalence or sample prevalence
prev = prevalence if prevalence is not None else sample_prevalence
# Core diagnostic measures
sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0.0
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0.0
# Predictive values
if prevalence is not None:
# Use Bayes' theorem with provided prevalence
ppv = (sensitivity * prev) / (sensitivity * prev + (1 - specificity) * (1 - prev))
npv = (specificity * (1 - prev)) / (specificity * (1 - prev) + (1 - sensitivity) * prev)
else:
# Use sample values
ppv = tp / (tp + fp) if (tp + fp) > 0 else 0.0
npv = tn / (tn + fn) if (tn + fn) > 0 else 0.0
# Likelihood ratios
lr_positive = sensitivity / (1 - specificity) if specificity < 1 else float('inf')
lr_negative = (1 - sensitivity) / specificity if specificity > 0 else float('inf')
# Overall accuracy
accuracy = (tp + tn) / total
# Youden's J statistic
youden = sensitivity + specificity - 1
return DiagnosticResult(
tp=tp, fp=fp, fn=fn, tn=tn,
sensitivity=sensitivity,
specificity=specificity,
ppv=ppv,
npv=npv,
lr_positive=lr_positive,
lr_negative=lr_negative,
accuracy=accuracy,
youden=youden,
prevalence=prev
)
[docs]
def diagnostic_from_data(
y_true: np.ndarray,
y_pred: np.ndarray,
threshold: float = 0.5
) -> DiagnosticResult:
"""
Calculate diagnostic measures from true labels and predicted scores.
Args:
y_true: True binary labels (0 or 1)
y_pred: Predicted scores or probabilities
threshold: Classification threshold
Returns:
DiagnosticResult object
"""
# Convert to binary predictions
y_pred_binary = (y_pred >= threshold).astype(int)
# Calculate confusion matrix
tp = np.sum((y_true == 1) & (y_pred_binary == 1))
fp = np.sum((y_true == 0) & (y_pred_binary == 1))
fn = np.sum((y_true == 1) & (y_pred_binary == 0))
tn = np.sum((y_true == 0) & (y_pred_binary == 0))
return diagnostic_test_2x2(tp, fp, fn, tn)
[docs]
def roc_analysis(
y_true: np.ndarray,
y_score: np.ndarray,
method: str = "youden",
**kwargs
) -> ROCResult:
"""
Perform ROC curve analysis.
Args:
y_true: True binary labels
y_score: Predicted scores or probabilities
method: Method for optimal threshold selection:
'youden' (default), 'closest_topleft', 'max_accuracy'
Returns:
ROCResult object
"""
# Calculate ROC curve
from sklearn.metrics import roc_curve, auc # lazy — avoids 1.5s startup cost
fpr, tpr, thresholds = roc_curve(y_true, y_score, **kwargs)
roc_auc = auc(fpr, tpr)
# Calculate optimal threshold based on method
if method == "youden":
# Youden's J statistic: max(sens + spec - 1)
youden_index = tpr + (1 - fpr) - 1
optimal_idx = np.argmax(youden_index)
optimal_threshold = thresholds[optimal_idx]
optimal_point = {
"sensitivity": tpr[optimal_idx],
"specificity": 1 - fpr[optimal_idx],
"youden": youden_index[optimal_idx]
}
elif method == "closest_topleft":
# Point closest to top-left corner (0,1)
distances = (fpr)**2 + (1 - tpr)**2
optimal_idx = np.argmin(distances)
optimal_threshold = thresholds[optimal_idx]
optimal_point = {
"sensitivity": tpr[optimal_idx],
"specificity": 1 - fpr[optimal_idx],
"distance": np.sqrt(distances[optimal_idx])
}
elif method == "max_accuracy":
# Maximum accuracy
accuracy = (tpr * np.sum(y_true) + (1 - fpr) * np.sum(1 - y_true)) / len(y_true)
optimal_idx = np.argmax(accuracy)
optimal_threshold = thresholds[optimal_idx]
optimal_point = {
"sensitivity": tpr[optimal_idx],
"specificity": 1 - fpr[optimal_idx],
"accuracy": accuracy[optimal_idx]
}
else:
raise ValueError(f"Unknown method: {method}")
return ROCResult(
fpr=fpr,
tpr=tpr,
thresholds=thresholds,
auc=roc_auc,
optimal_threshold=optimal_threshold,
optimal_point=optimal_point
)
[docs]
def likelihood_ratio_ci(
lr: float,
tp: int,
fp: int,
fn: int,
tn: int,
confidence: float = 0.95
) -> Tuple[float, float]:
"""
Calculate confidence interval for likelihood ratio.
Args:
lr: Likelihood ratio (positive or negative)
tp, fp, fn, tn: 2x2 table counts
confidence: Confidence level
Returns:
Tuple of (lower, upper) CI bounds
"""
if lr <= 0:
return 0.0, 0.0
# Log transformation for CI
log_lr = np.log(lr)
if lr == float('inf'):
# Handle infinite LR
return float('inf'), float('inf')
# Standard error on log scale
if lr > 1: # Positive LR
var_log_lr = (1/tp - 1/(tp + fn) + 1/fp - 1/(fp + tn))
else: # Negative LR
var_log_lr = (1/fn - 1/(tp + fn) + 1/tn - 1/(fp + tn))
se_log_lr = np.sqrt(max(var_log_lr, 0))
z = stats.norm.ppf(1 - (1 - confidence) / 2)
log_ci_lower = log_lr - z * se_log_lr
log_ci_upper = log_lr + z * se_log_lr
return np.exp(log_ci_lower), np.exp(log_ci_upper)
[docs]
def predictive_values_from_sens_spec(
sensitivity: float,
specificity: float,
prevalence: float
) -> Tuple[float, float]:
"""
Calculate PPV and NPV from sensitivity, specificity, and prevalence.
Args:
sensitivity: Test sensitivity
specificity: Test specificity
prevalence: Disease prevalence
Returns:
Tuple of (PPV, NPV)
"""
# Input validation
for value, name in [(sensitivity, "sensitivity"),
(specificity, "specificity"),
(prevalence, "prevalence")]:
if not 0 <= value <= 1:
raise ValueError(f"{name} must be between 0 and 1")
# Bayes' theorem
ppv = (sensitivity * prevalence) / (sensitivity * prevalence + (1 - specificity) * (1 - prevalence))
npv = (specificity * (1 - prevalence)) / (specificity * (1 - prevalence) + (1 - sensitivity) * prevalence)
return ppv, npv
[docs]
def fagan_nomogram(
pre_test_prob: float,
lr: float
) -> float:
"""
Calculate post-test probability using Fagan's nomogram method.
Args:
pre_test_prob: Pre-test probability (0-1)
lr: Likelihood ratio (positive or negative)
Returns:
Post-test probability
"""
if not 0 <= pre_test_prob <= 1:
raise ValueError("pre_test_prob must be between 0 and 1")
if lr <= 0:
raise ValueError("lr must be positive")
if lr == float('inf'):
return 1.0
elif lr == 0:
return 0.0
# Convert probability to odds
pre_test_odds = pre_test_prob / (1 - pre_test_prob)
# Apply likelihood ratio
post_test_odds = pre_test_odds * lr
# Convert back to probability
post_test_prob = post_test_odds / (1 + post_test_odds)
return post_test_prob
[docs]
def diagnostic_accuracy_ci(
accuracy: float,
n: int,
confidence: float = 0.95
) -> Tuple[float, float]:
"""
Calculate confidence interval for diagnostic accuracy.
Args:
accuracy: Observed accuracy
n: Total sample size
confidence: Confidence level
Returns:
Tuple of (lower, upper) CI bounds
"""
from .descriptive import proportion_ci
ci = proportion_ci(int(accuracy * n), n, confidence=confidence)
return ci.ci_lower, ci.ci_upper
[docs]
def compare_diagnostic_tests(
test1: DiagnosticResult,
test2: DiagnosticResult,
paired: bool = False,
n: Optional[int] = None
) -> Dict[str, float]:
"""
Compare two diagnostic tests.
Args:
test1: First test results
test2: Second test results
paired: Whether tests were applied to same subjects
n: Number of subjects (required if paired=True)
Returns:
Dictionary with comparison statistics
"""
# Compare sensitivities
sens_diff = test1.sensitivity - test2.sensitivity
# Compare specificities
spec_diff = test1.specificity - test2.specificity
# Compare AUCs if available
auc_diff = None
if hasattr(test1, 'auc') and hasattr(test2, 'auc'):
auc_diff = test1.auc - test2.auc
result = {
"sensitivity_difference": sens_diff,
"specificity_difference": spec_diff,
"ppv_difference": test1.ppv - test2.ppv,
"npv_difference": test1.npv - test2.npv,
"accuracy_difference": test1.accuracy - test2.accuracy,
"youden_difference": test1.youden - test2.youden
}
if auc_diff is not None:
result["auc_difference"] = auc_diff
if paired and n is not None:
# Calculate McNemar's test for paired proportions
from .contingency import Table2x2
# This would require the full 2x2 comparison table
# Simplified version for now
pass
return result
[docs]
def optimal_threshold_grid_search(
y_true: np.ndarray,
y_score: np.ndarray,
criteria: List[str] = ["youden", "accuracy", "f1"],
thresholds: Optional[np.ndarray] = None
) -> Dict[str, Dict]:
"""
Find optimal threshold using multiple criteria.
Args:
y_true: True labels
y_score: Predicted scores
criteria: List of criteria to optimize
thresholds: Specific thresholds to test (optional)
Returns:
Dictionary with optimal thresholds for each criterion
"""
if thresholds is None:
thresholds = np.unique(y_score)
thresholds = np.sort(thresholds)
results = {}
for criterion in criteria:
best_value = -np.inf
best_threshold = 0.5
for threshold in thresholds:
pred = (y_score >= threshold).astype(int)
if criterion == "youden":
tn = np.sum((y_true == 0) & (pred == 0))
fp = np.sum((y_true == 0) & (pred == 1))
tp = np.sum((y_true == 1) & (pred == 1))
fn = np.sum((y_true == 1) & (pred == 0))
sens = tp / (tp + fn) if (tp + fn) > 0 else 0
spec = tn / (tn + fp) if (tn + fp) > 0 else 0
value = sens + spec - 1
elif criterion == "accuracy":
accuracy = np.mean(pred == y_true)
value = accuracy
elif criterion == "f1":
tp = np.sum((y_true == 1) & (pred == 1))
fp = np.sum((y_true == 0) & (pred == 1))
fn = np.sum((y_true == 1) & (pred == 0))
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
if precision + recall > 0:
value = 2 * precision * recall / (precision + recall)
else:
value = 0
else:
continue
if value > best_value:
best_value = value
best_threshold = threshold
results[criterion] = {
"threshold": best_threshold,
"value": best_value
}
return results
# MODULE EXPORTS
__all__ = [
'DiagnosticMeasure',
'DiagnosticResult',
'ROCResult',
'diagnostic_test_2x2',
'diagnostic_from_data',
'roc_analysis',
'likelihood_ratio_ci',
'predictive_values_from_sens_spec',
'fagan_nomogram',
'diagnostic_accuracy_ci',
'compare_diagnostic_tests',
'optimal_threshold_grid_search'
]