Skip to content

ERCOT To Meters Data Model

ERCOTToMDMTransformer

Bases: BaseRawToMDMTransformer

Converts ERCOT Raw data into Meters Data Model.

Please check the BaseRawToMDMTransformer for the required arguments and methods.

Example

from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
from rtdip_sdk.pipelines.utilities import SparkSessionUtility

# Not required if using Databricks
spark = SparkSessionUtility(config={}).execute()

ercot_to_mdm_transformer = ERCOTToMDMTransformer(
    spark=spark,
    data=df,
    output_type="usage",
    name=None,
    description=None,
    value_type=None,
    version=None,
    series_id=None,
    series_parent_id=None
)

result = ercot_to_mdm_transformer.transform()
Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/ercot_to_mdm.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class ERCOTToMDMTransformer(BaseRawToMDMTransformer):
    """
    Converts ERCOT Raw data into Meters Data Model.

    Please check the BaseRawToMDMTransformer for the required arguments and methods.

    Example
    --------
    ```python
    from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
    from rtdip_sdk.pipelines.utilities import SparkSessionUtility

    # Not required if using Databricks
    spark = SparkSessionUtility(config={}).execute()

    ercot_to_mdm_transformer = ERCOTToMDMTransformer(
        spark=spark,
        data=df,
        output_type="usage",
        name=None,
        description=None,
        value_type=None,
        version=None,
        series_id=None,
        series_parent_id=None
    )

    result = ercot_to_mdm_transformer.transform()
    ```
    """

    spark: SparkSession
    data: DataFrame
    input_schema = ERCOT_SCHEMA
    uid_col = "variable"
    series_id_col = "'series_std_001'"
    timestamp_col = "to_utc_timestamp(StartTime, 'America/Chicago')"
    interval_timestamp_col = "Timestamp + INTERVAL 1 HOURS"
    value_col = "value"
    series_parent_id_col = "'series_parent_std_001'"
    name_col = "'ERCOT API'"
    uom_col = "'mwh'"
    description_col = "'ERCOT data pulled from ERCOT ISO API'"
    timestamp_start_col = "StartTime"
    timestamp_end_col = "StartTime + INTERVAL 1 HOURS"
    time_zone_col = "'America/Chicago'"
    version_col = "'1'"
    series_type = SeriesType.Hour
    model_type = ModelType.Default
    value_type = ValueType.Usage
    properties_col = "null"

    def _pre_process(self) -> DataFrame:
        df: DataFrame = super(ERCOTToMDMTransformer, self)._pre_process()
        df = melt(
            df,
            id_vars=["Date", "HourEnding", "DstFlag"],
            value_vars=[
                "Coast",
                "East",
                "FarWest",
                "North",
                "NorthCentral",
                "SouthCentral",
                "Southern",
                "West",
                "SystemTotal",
            ],
        )
        df = df.withColumn(
            "StartTime",
            F.expr(
                "Date + MAKE_INTERVAL(0,0,0,0,cast(split(HourEnding,':')[0] as integer),0,0)"
            ),
        )
        return df