Skip to content

PySpark to Pandas DataFrame Conversion

PySparkToPandasTransformer

Bases: TransformerInterface

Converts a PySpark DataFrame to a Pandas DataFrame.

Example

from rtdip_sdk.pipelines.transformers import PySparkToPandasTransformer

pyspark_to_pandas = PySparkToPandasTransformer(
    df=df
)

result = pyspark_to_pandas.transform()

Parameters:

Name Type Description Default
df DataFrame

PySpark DataFrame to be converted

required
Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/pyspark_to_pandas.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class PySparkToPandasTransformer(TransformerInterface):
    """
    Converts a PySpark DataFrame to a Pandas DataFrame.

    Example
    --------
    ```python
    from rtdip_sdk.pipelines.transformers import PySparkToPandasTransformer

    pyspark_to_pandas = PySparkToPandasTransformer(
        df=df
    )

    result = pyspark_to_pandas.transform()
    ```

    Parameters:
        df (DataFrame): PySpark DataFrame to be converted
    """

    df: PySparkDataFrame

    def __init__(self, df: PySparkDataFrame) -> None:
        self.df = df

    @staticmethod
    def system_type():
        """
        Attributes:
            SystemType (Environment): Requires PYSPARK
        """
        return SystemType.PYSPARK

    @staticmethod
    def libraries():
        libraries = Libraries()
        return libraries

    @staticmethod
    def settings() -> dict:
        return {}

    def pre_transform_validation(self):
        return True

    def post_transform_validation(self):
        return True

    def transform(self) -> PandasDataFrame:
        """
        Returns:
            DataFrame: A Pandas dataframe converted from a PySpark DataFrame.
        """
        df = self.df.toPandas()
        return df

system_type() staticmethod

Attributes:

Name Type Description
SystemType Environment

Requires PYSPARK

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/pyspark_to_pandas.py
47
48
49
50
51
52
53
@staticmethod
def system_type():
    """
    Attributes:
        SystemType (Environment): Requires PYSPARK
    """
    return SystemType.PYSPARK

transform()

Returns:

Name Type Description
DataFrame DataFrame

A Pandas dataframe converted from a PySpark DataFrame.

Source code in src/sdk/python/rtdip_sdk/pipelines/transformers/spark/pyspark_to_pandas.py
70
71
72
73
74
75
76
def transform(self) -> PandasDataFrame:
    """
    Returns:
        DataFrame: A Pandas dataframe converted from a PySpark DataFrame.
    """
    df = self.df.toPandas()
    return df