Skip to content

Dataframe

Helpers for dealing with pandas.DataFrames

assert_frame_equal(left, right, **kwargs)

Asserts that two dataframes are equal regardless of their order of rows

Parameters:

Name Type Description Default
left DataFrame

A dataframe, usually the result of a function under test

required
right DataFrame

Another dataframe, usually what we expect in a test

required
**kwargs Any {}
Source code in bquest/dataframe.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def assert_frame_equal(left: pandas.DataFrame, right: pandas.DataFrame, **kwargs: Any) -> None:
    """Asserts that two dataframes are equal regardless of their order of rows

    Args:
        left: A dataframe, usually the result of a function under test
        right: Another dataframe, usually what we expect in a test
        **kwargs: Keyword arguments of pandas.testing.assert_frame_equal <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.testing.assert_frame_equal.html>
    """

    _fix_integer_dtypes(left)
    _fix_integer_dtypes(right)

    left_sorted = left[sorted(left.columns)].sort_values(sorted(left.columns)).reset_index(drop=True)
    right_sorted = right[sorted(right.columns)].sort_values(sorted(right.columns)).reset_index(drop=True)

    pd_test.assert_frame_equal(left_sorted, right_sorted, **kwargs)

standardize_frame_numerics(df, float_precision=2)

Standardizes numerics inside a dataframe to facilitate comparison between dataframes with respect to meaningful differences.

Parameters:

Name Type Description Default
df DataFrame

Pandas dataframe to be standardized

required
float_precision int

level of precision for rounding floats

2

Returns:

Type Description
DataFrame

Standardized dataframe

Source code in bquest/dataframe.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
def standardize_frame_numerics(df: pandas.DataFrame, float_precision: int = 2) -> pandas.DataFrame:
    """Standardizes numerics inside a dataframe to facilitate comparison between
     dataframes with respect to meaningful differences.

    Args:
        df: Pandas dataframe to be standardized
        float_precision: level of precision for rounding floats

    Returns:
        Standardized dataframe
    """
    df = df.round(float_precision)

    integer_columns = df.select_dtypes(POSSIBLE_INTEGER_DTYPES).columns

    for col in integer_columns:
        df[col] = df[col].astype(float)

    return df.fillna(value=np.nan).reset_index(drop=True)