Skip to content

Betweenstats

The BetweenStats() class is meant for comparing numerical values across multiple groups. You want to use it in cases such as:

  • Blood glucose levels before and after treatment
  • Customer satisfaction ratings between two product versions
  • Weight changes across three fitness plans
  • Air quality index in urban vs. rural areas

It supports tests for 2 groups or more, paired groups or not, equal variance or not, parametric or not, and a large set of styling options.

Plot style

  • Default
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot()
  • Change colors
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot(
    colors=["#005f73", "#ee9b00", "#9b2226"]
)
  • Change orientation
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot(
    orientation="horizontal"
)
  • Remove stats from plot
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot(
    show_stats=False
)
  • Remove means from plot
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot(
    show_means=False
)
  • Hide specific elements
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()

BetweenStats(df["sepal_length"], df["species"]).plot(
    box=False,
    scatter=False,
    violin=True, # default
)
  • Advanced example
# mkdocs: render
from fleur import BetweenStats, data
import polars as pl
import matplotlib.pyplot as plt

df = data.load_titanic("polars")
df = (
    df.select(pl.col("Age"), pl.col("Survived"))
    .drop_nulls()
    .with_columns(pl.col("Survived").cast(pl.String).cast(pl.Categorical))
    .with_columns(
        Survived=pl.when(pl.col("Survived") == "1")
        .then(pl.lit("Survived"))
        .otherwise(pl.lit("Died"))
    )
)

fig, ax = plt.subplots()
BetweenStats(df["Survived"], df["Age"], approach="nonparametric").plot(
    ax=ax,
    orientation="horizontal",
    scatter_kws={"alpha": 0.3, "s": 20},
    jitter_amount=0.3,
    colors=["#005f73", "#ee9b00"],
)

Statistics

  • Dependent (paired) samples
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()
df = df[df["species"] != "setosa"] # keep only 2 groups

BetweenStats(df["sepal_length"], df["species"], paired=True).plot()
  • Non-parametric test
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()
df = df[df["species"] != "setosa"] # keep only 2 groups

BetweenStats(df["sepal_length"], df["species"], approach="nonparametric").plot()
  • Non-parametric test + paired samples
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()
df = df[df["species"] != "setosa"] # keep only 2 groups

BetweenStats(
    df["sepal_length"],
    df["species"],
    approach="nonparametric",
    paired=True,
).plot()
  • Robust
# mkdocs: render
from fleur import BetweenStats
from fleur import data

df = data.load_iris()
df = df[df["species"] != "setosa"] # keep only 2 groups

BetweenStats(
    df["sepal_length"],
    df["species"],
    approach="robust",
    trim=0.1,
).plot()



Learn more in the BetweenStats() reference page