Why is the mean toxicity of racial subgroups lower than the mean toxicity of the entire race category?

Question

I’m working on a dataset that measures toxicity scores for different racial subgroups. I aggregated the data to analyze the mean toxicity for each racial subgroup and compared it to the mean toxicity for the entire “Race” category. The code looks horrible right now and I don’t have the mental effort at the moment to fully explain the project. My main problem is that the mean toxicity for samples of individual subgroups (e.g., “Black”, “White”, “Asian”) is significantly lower than the mean toxicity for the whole population of these races.

For example,

mean for white (total): 0.5366622949663186

mean for white (10% sample): 0.021136063408190225

mean for white (60% sample): 0.020211360634081903

These sample averages are way too low compared to the total average. I just don't know why.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

categories = {
    "Sexual Orientation": ["lesbian", "gay", "bisexual", "transgender", "trans", "queer", "lgbt", "lgbtq", "homosexual", "straight", "heterosexual"],
    "Gender Identity": ["male", "female", "nonbinary"],
    "Race": ["african", "african american", "black", "white", "european", "hispanic", "latino", "latina", "latinx", "mexican", "asian", "indian", "middle eastern", "chinese", "japanese"],
    "Nationality": ["canadian", "american"],
    "Religion": ["christian", "muslim", "jewish", "buddhist", "catholic", "protestant", "sikh", "taoist"],
    "Age": ["old", "older", "young", "younger", "teenage", "millenial", "middle aged", "elderly"],
    "Disability": ["blind", "deaf", "paralyzed"]
}

# Load dataset correctly
df = pd.read_csv("toxity_per_attribute.csv", dtype=str)  # Read all as string initially

# Convert TOXICITY to float
df["TOXICITY"] = pd.to_numeric(df["TOXICITY"], errors="coerce")

# Remove rows where TOXICITY is greater than 1 (to fix misalignment issues)
df = df[df["TOXICITY"] <= 1]

# Convert subgroup columns from "TRUE"/"FALSE" strings to actual booleans
subgroup_cols = df.columns[2:]  # All columns except Wiki_ID and TOXICITY
df[subgroup_cols] = df[subgroup_cols].replace({"TRUE": True, "FALSE": False})

# Step 3.1: Remove rows where all subgroup values are False
df_filtered = df.copy()
df_filtered["subgroup_sum"] = df_filtered[subgroup_cols].sum(axis=1)  # Ensure numeric sum
df_filtered = df_filtered[df_filtered["subgroup_sum"] > 0]  # Keep rows where at least one subgroup is True
df_filtered.drop(columns=["subgroup_sum"], inplace=True)

# Step 2: Classify Subgroups into Protected Classes
protected_classes = categories

def combine_subgroups(df, class_dict):
    for class_name, subgroups in class_dict.items():
        df[class_name] = df[subgroups].max(axis=1)  # Ensure presence is captured
combine_subgroups(df_filtered, protected_classes)

# Step 3: Ensure subgroup values remain 0 or 1
df_filtered[subgroup_cols] = df_filtered[subgroup_cols].replace({True: 1, False: 0})

# Step 4: Compute Correlation between Protected Classes and Toxicity
correlations = {}
for class_name in protected_classes.keys():
    filtered_df = df_filtered[[class_name, "TOXICITY"]].dropna()
    if filtered_df[class_name].nunique() > 1:
        correlations[class_name] = filtered_df[class_name].corr(filtered_df['TOXICITY'])
    else:
        correlations[class_name] = 0

correlation_table = pd.DataFrame({
    'Protected Class': correlations.keys(),
    'Correlation': correlations.values()
})
print("Updated Correlation Table:")
print(correlation_table)

# Step 5: Statistical Analysis
mean_toxicity = df_filtered['TOXICITY'].mean()
std_toxicity = df_filtered['TOXICITY'].std()
confidence_interval = (
    max(0, mean_toxicity - 1.96 * std_toxicity),
    min(1, mean_toxicity + 1.96 * std_toxicity)
)
print(f"Mean Toxicity: {mean_toxicity}, Std Dev: {std_toxicity}, 95% Range: {confidence_interval}")

# Step 6: Analyze Subgroups for a Chosen Protected Class
chosen_class = "Race"
subgroup_means = {}
for subgroup in categories[chosen_class]:
    if subgroup in df_filtered.columns:
        mean_value = df_filtered[df_filtered[subgroup] > 0]["TOXICITY"].mean()
        print(f"Toxicity mean for {subgroup}: {mean_value}")
        subgroup_means[subgroup] = mean_value
    else:
        print(f"Warning: {subgroup} not found in df_filtered")
        subgroup_means[subgroup] = None
print(f"Toxicity means for {chosen_class} subgroups:", subgroup_means)

# Step 7: Boxplot Visualization for Top 3 Correlations
top_correlations = correlation_table.nlargest(3, 'Correlation')
for class_name in top_correlations["Protected Class"]:
    plt.figure(figsize=(8, 6))
    sns.boxplot(x=df_filtered[class_name], y=df_filtered["TOXICITY"])
    plt.xlabel(class_name)
    plt.ylabel("Toxicity")
    plt.title(f"Toxicity Distribution by {class_name}")
    plt.show()

# Step 8: Random Sampling Analysis for Chosen Protected Class
def random_sample_analysis(df, sample_size, column):
    sample = df.sample(frac=sample_size, random_state=42)
    mean_sample = sample[column].mean()
    std_sample = sample[column].std()
    margin_of_error = 1.96 * (std_sample / np.sqrt(len(sample)))
    return mean_sample, std_sample, margin_of_error

sample_10 = random_sample_analysis(df_filtered, 0.1, "Race")
sample_60 = random_sample_analysis(df_filtered, 0.6, "Race")
print(f"10% Sample (Race): Mean = {sample_10[0]}, Std Dev = {sample_10[1]}, MoE = {sample_10[2]}")
print(f"60% Sample (Race): Mean = {sample_60[0]}, Std Dev = {sample_60[1]}, MoE = {sample_60[2]}")

# Step 8.1: Random Sampling for Subgroups within the Chosen Protected Class
for subgroup in categories[chosen_class]:
    if subgroup in df_filtered.columns:
        sample_10 = random_sample_analysis(df_filtered, 0.1, subgroup)
        sample_60 = random_sample_analysis(df_filtered, 0.6, subgroup)
        print(f"10% Sample ({subgroup}): Mean = {sample_10[0]}, Std Dev = {sample_10[1]}, MoE = {sample_10[2]}")
        print(f"60% Sample ({subgroup}): Mean = {sample_60[0]}, Std Dev = {sample_60[1]}, MoE = {sample_60[2]}")

sns.histplot(df_filtered["TOXICITY"], bins=20, kde=True)
plt.xlabel("Toxicity")
plt.ylabel("Frequency")
plt.title("Toxicity Distribution in Full Dataset")
plt.show()

for subgroup in categories["Race"]:
    if subgroup in df_filtered.columns:
        sample_10 = df_filtered.sample(frac=0.1, random_state=42)
        sample_60 = df_filtered.sample(frac=0.6, random_state=42)

        # Check how many rows actually have the subgroup labeled True
        print(f"{subgroup}: {sample_10[sample_10[subgroup] > 0].shape[0]} / {sample_10.shape[0]} rows in 10% sample")
        print(f"{subgroup}: {sample_60[sample_60[subgroup] > 0].shape[0]} / {sample_60.shape[0]} rows in 60% sample")

I tried this:

for subgroup in categories[chosen_class]:
    if subgroup in df_filtered.columns:
        sample_10 = random_sample_analysis(df_filtered, 0.1, subgroup)
        sample_60 = random_sample_analysis(df_filtered, 0.6, subgroup)
        print(f"10% Sample ({subgroup}): Mean = {sample_10[0]}, Std Dev = {sample_10[1]}, MoE = {sample_10[2]}")
        print(f"60% Sample ({subgroup}): Mean = {sample_60[0]}, Std Dev = {sample_60[1]}, MoE = {sample_60[2]}")

Which I thought would work but it's still giving me sample means that are too low compared to total population means.

Why is the mean toxicity of racial subgroups lower than the mean toxicity of the entire race category?

Answers (0)

Related Questions