stratified Module#

Stratified analysis for confounding and effect modification.

This module provides functions for stratified analysis, including Mantel-Haenszel methods for adjusting for confounding variables and testing for effect modification.

Classes#

class episia.stats.stratified.StratifiedMethod(value)[source]#

Bases: Enum

Methods for stratified analysis.

DIRECT_STANDARDIZATION = 'direct'#
INDIRECT_STANDARDIZATION = 'indirect'#
MANTEL_HAENSZEL = 'mantel_haenszel'#
class episia.stats.stratified.StratifiedTable(tables, strata_names=None)[source]#

Bases: object

Container for stratified 2x2 tables.

Parameters:
__getitem__(idx)[source]#
__len__()[source]#
Return type:

int

__post_init__()[source]#

Validate that all tables have the same structure.

strata_names: List[str] | None = None#
tables: List[Table2x2]#
to_dict()[source]#

Convert to dictionary representation.

Return type:

Dict

class episia.stats.stratified.MantelHaenszelResult(common_or, common_rr, common_rd, or_ci, rr_ci, chi2_mh, p_value, cochran_q, q_p_value, i_squared, tau_squared)[source]#

Bases: object

Result object for Mantel-Haenszel analysis.

Parameters:
__repr__()[source]#

Return repr(self).

Return type:

str

chi2_mh: float#
cochran_q: float#
common_or: float#
common_rd: float#
common_rr: float#
i_squared: float#
or_ci: Tuple[float, float]#
p_value: float#
q_p_value: float#
rr_ci: Tuple[float, float]#
summary()[source]#

Generate text summary.

Return type:

str

tau_squared: float#
class episia.stats.stratified.DirectStandardizationResult(crude_rate, adjusted_rate, standard_population, stratum_specific_rates, variance, ci)[source]#

Bases: object

Result object for direct standardization.

Parameters:
__repr__()[source]#

Return repr(self).

Return type:

str

adjusted_rate: float#
ci: Tuple[float, float]#
crude_rate: float#
standard_population: ndarray#
stratum_specific_rates: ndarray#
variance: float#

Functions#

episia.stats.stratified.mantel_haenszel_or(stratified_tables, confidence=0.95)[source]#

Calculate Mantel-Haenszel pooled odds ratio.

Parameters:
Returns:

MantelHaenszelResult object

Return type:

MantelHaenszelResult

Example

>>> table1 = Table2x2(10, 20, 30, 40)
>>> table2 = Table2x2(15, 25, 35, 45)
>>> result = mantel_haenszel_or([table1, table2])
episia.stats.stratified.test_effect_modification(stratified_tables, method='breslow_day')[source]#

Test for effect modification (interaction) across strata.

Parameters:
  • stratified_tables (StratifiedTable) – StratifiedTable object

  • method (str) – ‘breslow_day’ or ‘woolf’

Returns:

Dictionary with test statistics

Return type:

Dict[str, float]

episia.stats.stratified.direct_standardization(stratum_rates, stratum_populations, standard_population, confidence=0.95)[source]#

Perform direct standardization of rates.

Parameters:
  • stratum_rates (ndarray) – Rates in each stratum

  • stratum_populations (ndarray) – Population in each stratum

  • standard_population (ndarray) – Standard population distribution

  • confidence (float) – Confidence level

Returns:

DirectStandardizationResult object

Return type:

DirectStandardizationResult

episia.stats.stratified.indirect_standardization(observed_cases, stratum_populations, reference_rates, confidence=0.95)[source]#

Perform indirect standardization (SMR calculation).

Parameters:
  • observed_cases (ndarray) – Observed cases in each stratum

  • stratum_populations (ndarray) – Population in each stratum

  • reference_rates (ndarray) – Reference rates in each stratum

  • confidence (float) – Confidence level

Returns:

Dictionary with SMR and other statistics

Return type:

Dict[str, float]

episia.stats.stratified.stratified_by_variable(data, exposure_var, outcome_var, stratify_var)[source]#

Create stratified tables from DataFrame.

Parameters:
  • data – pandas DataFrame

  • exposure_var (str) – Exposure variable name

  • outcome_var (str) – Outcome variable name

  • stratify_var (str) – Variable to stratify by

Returns:

StratifiedTable object

Return type:

StratifiedTable

Examples#

Creating stratified tables:

from episia.stats.contingency import Table2x2
from episia.stats.stratified import StratifiedTable

# Two strata (e.g., males and females)
table1 = Table2x2(a=10, b=20, c=30, d=40)
table2 = Table2x2(a=15, b=25, c=35, d=45)

stratified = StratifiedTable(
    tables=[table1, table2],
    strata_names=['Males', 'Females']
)

Mantel-Haenszel analysis:

from episia.stats.stratified import mantel_haenszel_or

mh_result = mantel_haenszel_or(stratified, confidence=0.95)
print(mh_result)
print(f"Common OR: {mh_result.common_or:.3f}")
print(f"95% CI: {mh_result.or_ci[0]:.3f}-{mh_result.or_ci[1]:.3f}")
print(f"Test for heterogeneity: p={mh_result.q_p_value:.3f}")
print(f"I² = {mh_result.i_squared:.1f}%")

Testing effect modification:

from episia.stats.stratified import test_effect_modification

em_test = test_effect_modification(stratified, method='woolf')
print(f"Homogeneity test: χ²={em_test['statistic']:.3f}, p={em_test['p_value']:.3f}")

Direct standardization:

import numpy as np
from episia.stats.stratified import direct_standardization

# Age-specific rates, population, standard population
rates = np.array([0.01, 0.05, 0.10])
pop = np.array([1000, 800, 500])
standard = np.array([1000, 1000, 1000])

result = direct_standardization(rates, pop, standard)
print(f"Crude rate: {result.crude_rate:.4f}")
print(f"Adjusted rate: {result.adjusted_rate:.4f}")
print(f"95% CI: {result.ci[0]:.4f}-{result.ci[1]:.4f}")

Indirect standardization (SMR):

from episia.stats.stratified import indirect_standardization

smr_result = indirect_standardization(
    observed_cases=np.array([10, 15, 5]),
    stratum_populations=np.array([1000, 800, 500]),
    reference_rates=np.array([0.02, 0.03, 0.01])
)
print(f"SMR: {smr_result['smr']:.3f}")
print(f"95% CI: {smr_result['ci_lower']:.3f}-{smr_result['ci_upper']:.3f}")

From DataFrame:

import pandas as pd
from episia.stats.stratified import stratified_by_variable

df = pd.DataFrame({
    'exposed': [1, 1, 0, 0, 1, 0],
    'case': [1, 0, 1, 0, 1, 0],
    'age_group': ['young', 'old', 'young', 'old', 'young', 'old']
})

stratified = stratified_by_variable(
    data=df,
    exposure_var='exposed',
    outcome_var='case',
    stratify_var='age_group'
)