Source code for episia.dhis2.adapter

"""
episia.dhis2.adapter - Convert DHIS2 API responses to SurveillanceDataset.
"""
from __future__ import annotations

from typing import Any, Dict, List, Optional

from ..data.surveillance import SurveillanceDataset


[docs] class DHIS2Adapter: """ Converts raw DHIS2 analytics API responses to SurveillanceDataset. This class is used internally by DHIS2Client but can also be used standalone to convert data you have already fetched. Example:: adapter = DHIS2Adapter() ds = adapter.from_analytics_response(raw_json) """
[docs] def from_analytics_response( self, response: Dict[str, Any], cases_element: Optional[str] = None, deaths_element: Optional[str] = None, ) -> SurveillanceDataset: """ Convert a DHIS2 /api/analytics JSON response to SurveillanceDataset. The analytics API returns rows like: [dx, pe, ou, value] where dx=data element UID, pe=period, ou=org unit UID. Args: response: Raw JSON dict from DHIS2 analytics endpoint. cases_element: UID of the cases data element (filters rows). deaths_element: UID of the deaths data element (optional). Returns: SurveillanceDataset with date_col='period', cases_col='cases'. """ try: import pandas as pd except ImportError: raise ImportError("pandas required: pip install pandas") headers = [h["name"] for h in response.get("headers", [])] rows = response.get("rows", []) if not rows: import pandas as pd empty = pd.DataFrame(columns=["period", "org_unit", "data_element", "cases"]) return SurveillanceDataset(empty, date_col="period", cases_col="cases") df = pd.DataFrame(rows, columns=headers) # Rename standard DHIS2 columns rename = {} if "pe" in df.columns: rename["pe"] = "period" if "ou" in df.columns: rename["ou"] = "org_unit" if "dx" in df.columns: rename["dx"] = "data_element" df = df.rename(columns=rename) df["value"] = pd.to_numeric(df["value"], errors="coerce").fillna(0) # Filter and pivot cases vs deaths if cases_element and "data_element" in df.columns: cases_df = df[df["data_element"] == cases_element].copy() cases_df = cases_df.rename(columns={"value": "cases"}) # If filter returned nothing, fall back to all rows if len(cases_df) == 0: cases_df = df.copy() cases_df = cases_df.rename(columns={"value": "cases"}) else: cases_df = df.copy() cases_df = cases_df.rename(columns={"value": "cases"}) # Ensure cases column always exists if "cases" not in cases_df.columns: cases_df["cases"] = 0 if deaths_element and "data_element" in df.columns: deaths_df = df[df["data_element"] == deaths_element][["period","org_unit","value"]].copy() deaths_df = deaths_df.rename(columns={"value": "deaths"}) cases_df = cases_df.merge(deaths_df, on=["period","org_unit"], how="left") deaths_col = "deaths" else: deaths_col = None # Parse period → datetime cases_df["period"] = self._parse_dhis2_period(cases_df["period"]) return SurveillanceDataset( cases_df, date_col = "period", cases_col = "cases", deaths_col = deaths_col, district_col = "org_unit" if "org_unit" in cases_df.columns else None, )
[docs] def from_data_value_sets( self, response: Dict[str, Any], ) -> "pd.DataFrame": """ Convert a DHIS2 /api/dataValueSets response to a flat DataFrame. Args: response: Raw JSON from dataValueSets endpoint. Returns: pandas DataFrame with columns: period, org_unit, data_element, value. """ try: import pandas as pd except ImportError: raise ImportError("pandas required: pip install pandas") values = response.get("dataValues", []) if not values: return pd.DataFrame(columns=["period","org_unit","data_element","value"]) rows = [] for v in values: rows.append({ "period": v.get("period",""), "org_unit": v.get("orgUnit",""), "data_element": v.get("dataElement",""), "value": float(v.get("value", 0) or 0), }) df = pd.DataFrame(rows) df["period"] = self._parse_dhis2_period(df["period"]) return df
def _parse_dhis2_period(self, series) -> "pd.Series": """ Parse DHIS2 period strings to pandas Timestamps. Handles: 2024W01 ISO week 202401 month YYYYMM 2024Q1 quarter 2024 year """ import pandas as pd import re def _parse_one(p: str): p = str(p).strip() # ISO week: 2024W01 m = re.match(r'^(\d{4})W(\d{1,2})$', p) if m: year, week = int(m.group(1)), int(m.group(2)) return pd.Timestamp.fromisocalendar(year, week, 1) # Month: 202401 m = re.match(r'^(\d{4})(\d{2})$', p) if m: return pd.Timestamp(f"{m.group(1)}-{m.group(2)}-01") # Quarter: 2024Q1 m = re.match(r'^(\d{4})Q(\d)$', p) if m: month = (int(m.group(2)) - 1) * 3 + 1 return pd.Timestamp(f"{m.group(1)}-{month:02d}-01") # Year: 2024 m = re.match(r'^(\d{4})$', p) if m: return pd.Timestamp(f"{m.group(1)}-01-01") # Fallback try: return pd.Timestamp(p) except Exception: return pd.NaT return series.apply(_parse_one)