Reputation: 2669
I am trying to simulate some ABC experiments to test some scenarios out. But I am finding that I am generating exactly zero false negatives.
I want to test for significance with a t-test down the line, so first I generate the base simulation parameters: a baseline conversion rate of 0.1, and a relative effect size of 5%. I chose 0.1 for my alpha, and 0.2 for my beta.
# Simulation parameters
num_repetitions = 1000
true_effect_B_prob = 0.5
true_effect_C_prob = 0.5
alpha = 0.1 # Significance level for t-tests
beta = 0.2 # Power level
CvR = 0.1 # Conversion rate
sample_variance = CvR * (1 - CvR) # Sample variance
MDE = 0.05 # Minimum detectable effect
test_CvR = CvR + CvR * MDE
effect_size = test_CvR - CvR
analysis = TTestIndPower()
n_samples = int(np.ceil(tt_ind_solve_power(effect_size / np.sqrt(sample_variance), power=1.0-beta, nobs1=None, alpha=alpha, alternative='two-sided')))
print("Calculated sample size:", n_samples)
With these parameters, I can check that I get the required power with the following code:
es = effect_size / np.sqrt(sample_variance)
analysis = smp.TTestIndPower()
analysis.power(effect_size=es, nobs1=n_samples, alpha=0.1)
And this gives me 0.8.
The simulation is itself really simple. First I generate samples from normal distributions with the requisite parameters:
def generate_ABC_samples(CvR, MDE, sample_variance, n_samples, true_effect_B_prob, true_effect_C_prob):
samples_A = np.random.normal(CvR, sample_variance, n_samples)
has_true_effect_B = np.random.rand() < true_effect_B_prob
has_true_effect_C = np.random.rand() < true_effect_C_prob
mean_B = CvR + CvR * MDE if has_true_effect_B else CvR
mean_C = CvR + CvR * MDE if has_true_effect_C else CvR
sample_variance_B = mean_B * (1.0 - mean_B)
sample_variance_C = mean_C * (1.0 - mean_C)
samples_B = np.random.normal(mean_B, sample_variance_B, n_samples)
samples_C = np.random.normal(mean_C, sample_variance_C, n_samples)
return samples_A, samples_B, samples_C, has_true_effect_B, has_true_effect_C
then for each repetition I calculate whether a t-test comes back significant, and whether there was a true effect (false positives and false negatives):
def simulate_ABC_experiment(CvR, MDE, sample_variance, true_effect_B_prob, true_effect_C_prob, alpha, n_samples):
samples_A, samples_B, samples_C, has_true_effect_B, has_true_effect_C = generate_ABC_samples(
CvR, MDE, sample_variance, n_samples, true_effect_B_prob, true_effect_C_prob
)
_, p_value_B = ttest_ind(samples_A, samples_B)
_, p_value_C = ttest_ind(samples_A, samples_C)
significant_B = p_value_B < alpha
significant_C = p_value_C < alpha
results = {
'true_positives_B': has_true_effect_B and significant_B,
'true_positives_C': has_true_effect_C and significant_C,
'true_negatives_B': not has_true_effect_B and not significant_B,
'true_negatives_C': not has_true_effect_C and not significant_C,
'false_positives_B': not has_true_effect_B and significant_B,
'false_positives_C': not has_true_effect_C and significant_C,
'false_negatives_B': has_true_effect_B and not significant_B,
'false_negatives_C': has_true_effect_C and not significant_C
}
return results
Then there is some code to loop over the repetitions:
def simulate_ABC_experiments(CvR, MDE, sample_variance, num_repetitions, true_effect_B_prob, true_effect_C_prob, alpha, n_samples):
summary = {
'true_positives_B': 0,
'true_positives_C': 0,
'true_negatives_B': 0,
'true_negatives_C': 0,
'false_positives_B': 0,
'false_positives_C': 0,
'false_negatives_B': 0,
'false_negatives_C': 0,
'experiment_false_positives': 0,
'experiment_false_negatives': 0
}
for _ in range(num_repetitions):
results = simulate_ABC_experiment(CvR, MDE, sample_variance, true_effect_B_prob, true_effect_C_prob, alpha, n_samples)
for key in summary:
if key in results:
summary[key] += results[key]
# Experiment-level calculations
if results['false_positives_B'] or results['false_positives_C']:
summary['experiment_false_positives'] += 1
if results['false_negatives_B'] or results['false_negatives_C']:
summary['experiment_false_negatives'] += 1
return summary
And do some bookkeeping:
results = simulate_ABC_experiments(CvR, MDE, sample_variance, num_repetitions, true_effect_B_prob, true_effect_C_prob, alpha, n_samples)
# Calculate rates
fpr_B = results['false_positives_B'] / num_repetitions
fnr_B = results['false_negatives_B'] / num_repetitions
fpr_C = results['false_positives_C'] / num_repetitions
fnr_C = results['false_negatives_C'] / num_repetitions
fpr_experiment = results['experiment_false_positives'] / num_repetitions
fnr_experiment = results['experiment_false_negatives'] / num_repetitions
# Create DataFrame for results
results_data = {
'True Positives': [results['true_positives_B'], results['true_positives_C']],
'True Negatives': [results['true_negatives_B'], results['true_negatives_C']],
'False Positives': [results['false_positives_B'], results['false_positives_C']],
'False Negatives': [results['false_negatives_B'], results['false_negatives_C']],
'False Positive Rate': [fpr_B, fpr_C, fpr_experiment],
'False Negative Rate': [fnr_B, fnr_C, fnr_experiment]
}
index = ['Variation B', 'Variation C', 'Experiment']
df_results = pd.DataFrame(results_data, index=index)
Yet, when I run the whole simulation, I consistently see zero false negatives. My intution is that I should miss ~100-200 negatives, but this isn't happening.
Why is this?
The whole simulation code is here (for easy copying):
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, norm
from statsmodels.stats.power import TTestIndPower, tt_ind_solve_power
import statsmodels.stats.power as smp
np.random.seed(42)
# Function to generate samples for variations A, B, and C
def generate_ABC_samples(CvR, MDE, sample_variance, n_samples, true_effect_B_prob, true_effect_C_prob):
samples_A = np.random.normal(CvR, sample_variance, n_samples)
has_true_effect_B = np.random.rand() < true_effect_B_prob
has_true_effect_C = np.random.rand() < true_effect_C_prob
mean_B = CvR + CvR * MDE if has_true_effect_B else CvR
mean_C = CvR + CvR * MDE if has_true_effect_C else CvR
sample_variance_B = mean_B * (1.0 - mean_B)
sample_variance_C = mean_C * (1.0 - mean_C)
samples_B = np.random.normal(mean_B, sample_variance_B, n_samples)
samples_C = np.random.normal(mean_C, sample_variance_C, n_samples)
return samples_A, samples_B, samples_C, has_true_effect_B, has_true_effect_C
# Function to simulate a single ABC experiment
def simulate_ABC_experiment(CvR, MDE, sample_variance, true_effect_B_prob, true_effect_C_prob, alpha, n_samples):
samples_A, samples_B, samples_C, has_true_effect_B, has_true_effect_C = generate_ABC_samples(
CvR, MDE, sample_variance, n_samples, true_effect_B_prob, true_effect_C_prob
)
_, p_value_B = ttest_ind(samples_A, samples_B)
_, p_value_C = ttest_ind(samples_A, samples_C)
significant_B = p_value_B < alpha
significant_C = p_value_C < alpha
results = {
'true_positives_B': has_true_effect_B and significant_B,
'true_positives_C': has_true_effect_C and significant_C,
'true_negatives_B': not has_true_effect_B and not significant_B,
'true_negatives_C': not has_true_effect_C and not significant_C,
'false_positives_B': not has_true_effect_B and significant_B,
'false_positives_C': not has_true_effect_C and significant_C,
'false_negatives_B': has_true_effect_B and not significant_B,
'false_negatives_C': has_true_effect_C and not significant_C
}
return results
# Function to simulate multiple ABC experiments
def simulate_ABC_experiments(CvR, MDE, sample_variance, num_repetitions, true_effect_B_prob, true_effect_C_prob, alpha, n_samples):
summary = {
'true_positives_B': 0,
'true_positives_C': 0,
'true_negatives_B': 0,
'true_negatives_C': 0,
'false_positives_B': 0,
'false_positives_C': 0,
'false_negatives_B': 0,
'false_negatives_C': 0,
'experiment_false_positives': 0,
'experiment_false_negatives': 0
}
for _ in range(num_repetitions):
results = simulate_ABC_experiment(CvR, MDE, sample_variance, true_effect_B_prob, true_effect_C_prob, alpha, n_samples)
for key in summary:
if key in results:
summary[key] += results[key]
# Experiment-level calculations
if results['false_positives_B'] or results['false_positives_C']:
summary['experiment_false_positives'] += 1
if results['false_negatives_B'] or results['false_negatives_C']:
summary['experiment_false_negatives'] += 1
return summary
# Example usage of the functions
if __name__ == "__main__":
# Run simulation
results = simulate_ABC_experiments(CvR, MDE, sample_variance, num_repetitions, true_effect_B_prob, true_effect_C_prob, alpha, n_samples)
# Calculate rates
fpr_B = results['false_positives_B'] / num_repetitions
fnr_B = results['false_negatives_B'] / num_repetitions
fpr_C = results['false_positives_C'] / num_repetitions
fnr_C = results['false_negatives_C'] / num_repetitions
fpr_experiment = results['experiment_false_positives'] / num_repetitions
fnr_experiment = results['experiment_false_negatives'] / num_repetitions
# Create DataFrame for results
results_data = {
'True Positives': [results['true_positives_B'], results['true_positives_C']],
'True Negatives': [results['true_negatives_B'], results['true_negatives_C']],
'False Positives': [results['false_positives_B'], results['false_positives_C']],
'False Negatives': [results['false_negatives_B'], results['false_negatives_C']],
'False Positive Rate': [fpr_B, fpr_C, fpr_experiment],
'False Negative Rate': [fnr_B, fnr_C, fnr_experiment]
}
index = ['Variation B', 'Variation C', 'Experiment']
df_results = pd.DataFrame(results_data, index=index)
I was expecting that the false negative rate of my simulations whould not be 0.0, but my intution has been uhm fooled.
Upvotes: 0
Views: 28