Bases: BaseRawToMDMTransformer
Converts ERCOT Raw data into Meters Data Model.
Please check the BaseRawToMDMTransformer for the required arguments and methods.
from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
from rtdip_sdk.pipelines.utilities import SparkSessionUtility
# Not required if using Databricks
spark = SparkSessionUtility(config={}).execute()
ercot_to_mdm_transformer = ERCOTToMDMTransformer(
spark=spark,
data=df,
output_type="usage",
name=None,
description=None,
value_type=None,
version=None,
series_id=None,
series_parent_id=None
)
result = ercot_to_mdm_transformer.transform()
Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/ercot_to_mdm.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 | class ERCOTToMDMTransformer(BaseRawToMDMTransformer):
"""
Converts ERCOT Raw data into Meters Data Model.
Please check the BaseRawToMDMTransformer for the required arguments and methods.
Example
--------
```python
from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
from rtdip_sdk.pipelines.utilities import SparkSessionUtility
# Not required if using Databricks
spark = SparkSessionUtility(config={}).execute()
ercot_to_mdm_transformer = ERCOTToMDMTransformer(
spark=spark,
data=df,
output_type="usage",
name=None,
description=None,
value_type=None,
version=None,
series_id=None,
series_parent_id=None
)
result = ercot_to_mdm_transformer.transform()
```
"""
spark: SparkSession
data: DataFrame
input_schema = ERCOT_SCHEMA
uid_col = "variable"
series_id_col = "'series_std_001'"
timestamp_col = "to_utc_timestamp(StartTime, 'America/Chicago')"
interval_timestamp_col = "Timestamp + INTERVAL 1 HOURS"
value_col = "value"
series_parent_id_col = "'series_parent_std_001'"
name_col = "'ERCOT API'"
uom_col = "'mwh'"
description_col = "'ERCOT data pulled from ERCOT ISO API'"
timestamp_start_col = "StartTime"
timestamp_end_col = "StartTime + INTERVAL 1 HOURS"
time_zone_col = "'America/Chicago'"
version_col = "'1'"
series_type = SeriesType.Hour
model_type = ModelType.Default
value_type = ValueType.Usage
properties_col = "null"
def _pre_process(self) -> DataFrame:
df: DataFrame = super(ERCOTToMDMTransformer, self)._pre_process()
df = melt(
df,
id_vars=["Date", "HourEnding", "DstFlag"],
value_vars=[
"Coast",
"East",
"FarWest",
"North",
"NorthCentral",
"SouthCentral",
"Southern",
"West",
"SystemTotal",
],
)
df = df.withColumn(
"StartTime",
F.expr(
"Date + MAKE_INTERVAL(0,0,0,0,cast(split(HourEnding,':')[0] as integer),0,0)"
),
)
return df
|