Analyze A/B Test Results¶

Matthew Unrue, Fall 2018¶

Udacity Data Analyst Nanodegree Project 3

Table of Contents¶

Part I - Probability
Part II - A/B Test
Part III - Regression

Part I - Probability¶

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
random.seed(42)

df = pd.read_csv('ab_data.csv')
df.head()

def proportions_ztest(count, nobs, value=None, alternative='two-sided',
                      prop_var=False):

    count = np.asarray(count)
    nobs = np.asarray(nobs)

    if nobs.size == 1:
        nobs = nobs * np.ones_like(count)
        print('nobs.size == 1')

    prop = count * 1. / nobs
    print('prop: ' + str(prop))
    k_sample = np.size(prop)
    print('k_sample: ' + str(k_sample))
    if value is None:
        if k_sample == 1:
            raise ValueError('value must be provided for a 1-sample test')
        value = 0
    if k_sample == 1:
        diff = prop - value
        print('k_sample == 1 and diff: ' + str(diff))
    elif k_sample == 2:
        diff = prop[0] - prop[1] - value
        print('k_sample == 2 and diff: ' + str(diff))
    else:
        msg = 'more than two samples are not implemented yet'
        raise NotImplementedError(msg)

    p_pooled = np.sum(count) * 1. / np.sum(nobs)
    print('p_pooled: ' + str(p_pooled))

    nobs_fact = np.sum(1. / nobs)
    print('nobs_fact: ' + str(nobs_fact))
    print('prop_var: ' + str(prop_var))
    if prop_var:
        p_pooled = prop_var
    var_ = p_pooled * (1 - p_pooled) * nobs_fact
    print('var_: ' + str(var_))
    std_diff = np.sqrt(var_)
    print('std_diff: ' + str(std_diff))
    
    return _zstat_generic2(diff, std_diff, alternative)

def _zstat_generic2(value, std_diff, alternative):
    '''generic (normal) z-test to save typing

    can be used as ztest based on summary statistics
    '''
    zstat = value / std_diff
    print('zstat: ' + str(zstat))
    if alternative in ['two-sided', '2-sided', '2s']:
        pvalue = stats.norm.sf(np.abs(zstat))*2
        print('alternative is two-sided.')
        print('pvalue: ' + str(pvalue))
        
    elif alternative in ['larger', 'l']:
        pvalue = stats.norm.sf(zstat)
    elif alternative in ['smaller', 's']:
        pvalue = stats.norm.cdf(zstat)
    else:
        raise ValueError('invalid alternative')
    return zstat, pvalue

df['group'].unique()

array(['control', 'treatment'], dtype=object)

df['landing_page'].unique()

array(['old_page', 'new_page'], dtype=object)

len(df)

294478

df['user_id'].nunique()

290584

sum(df['converted'] == 1) / len(df['converted'])

0.11965919355605512

treatment_wrong = sum((df['group'] == 'treatment') & (df['landing_page'] != 'new_page'))
control_wrong = sum((df['group'] == 'control') & (df['landing_page'] == 'new_page'))
total_wrong = treatment_wrong + control_wrong
total_wrong

3893

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
user_id         294478 non-null int64
timestamp       294478 non-null object
group           294478 non-null object
landing_page    294478 non-null object
converted       294478 non-null int64
dtypes: int64(2), object(3)
memory usage: 11.2+ MB

No rows appear to be missing any values.

df2 = df
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294478 entries, 0 to 294477
Data columns (total 5 columns):
user_id         294478 non-null int64
timestamp       294478 non-null object
group           294478 non-null object
landing_page    294478 non-null object
converted       294478 non-null int64
dtypes: int64(2), object(3)
memory usage: 11.2+ MB

df2 = df2.loc[~((df2['group'] == 'treatment') & (df2['landing_page'] != 'new_page'))]
df2 = df2.loc[~((df2['group'] == 'control') & (df2['landing_page'] == 'new_page'))]
df2.shape

(290585, 5)

df2[((df2['group'] == 'treatment') == (df2['landing_page'] == 'new_page')) == False].shape[0]

0

df2['user_id'].nunique()

290584

df2.loc[df2['user_id'].duplicated()]

df2.loc[df2['user_id'].duplicated()]

df2.drop(2893, inplace = True);

df2['user_id'].duplicated().value_counts()

False    290584
Name: user_id, dtype: int64

conv_prob = sum(df2['converted'] == 1) / len(df2['converted'])
conv_prob

0.11959708724499628

control_group = df2.loc[df2['group'] == 'control']
control_conv_prob = sum(control_group['converted'] == 1) / len(control_group['converted'])
control_conv_prob

0.1203863045004612

treatment_group = df2.loc[df2['group'] == 'treatment']
treatment_conv_prob = sum(treatment_group['converted'] == 1) / len(treatment_group['converted'])
treatment_conv_prob

0.11880806551510564

len(treatment_group) / len(df2)

0.5000619442226688

actual_prob_diff = control_conv_prob - treatment_conv_prob
actual_prob_diff

0.0015782389853555567

There appears to be a 0.00158 higher probability of a conversion from a control page than a conversion from a treatment page in this dataset.

Part II - A/B Test¶

$H_{0}: P_{new} - P_{old} \leq 0$

$H_{1}: P_{new} - P_{old} > 0$

$\alpha: 0.05$

conv_prob

0.11959708724499628

n_new = len(treatment_group)
n_new

145310

n_old = len(control_group)
n_old

145274

new_data_sample = treatment_group.sample(n_new, replace = True)

new_page_converted = new_data_sample['converted'].tolist()

p_new_conv_rate = sum(new_page_converted) / len(new_page_converted)
p_new_conv_rate

0.11848461909022091

old_data_sample = control_group.sample(n_old)

old_page_converted = old_data_sample['converted'].tolist()

p_old_conv_rate = sum(old_page_converted) / len(old_page_converted)
p_old_conv_rate

0.1203863045004612

ef_diff = p_new_conv_rate - p_old_conv_rate
ef_diff

-0.0019016854102402864

p_diffs = np.empty(0)

for x in range(10000):
    new_data_sample = treatment_group.sample(1000, replace = True)
    new_page_converted = new_data_sample['converted'].tolist()
    new_conv_prob = sum(new_page_converted) / len(new_page_converted)
    
    old_data_sample = control_group.sample(1000, replace = True)
    old_page_converted = old_data_sample['converted'].tolist()
    old_conv_prob = sum(old_page_converted) / len(old_page_converted)
    
    difference = new_conv_prob - old_conv_prob
    
    p_diffs = np.append(p_diffs, difference)

plt.hist(p_diffs)

(array([  13.,  163.,  687., 1796., 2562., 2705., 1497.,  483.,   88.,
           6.]),
 array([-0.054 , -0.0432, -0.0324, -0.0216, -0.0108,  0.    ,  0.0108,
         0.0216,  0.0324,  0.0432,  0.054 ]),
 <a list of 10 Patch objects>)

greater = [i for i in p_diffs if i > actual_prob_diff]
        
print('len(greater): ' + str(len(greater)))
print('len(p_diffs): ' + str(len(p_diffs)))
greater_prop = len(greater) / len(p_diffs)
greater_prop

len(greater): 4234
len(p_diffs): 10000

0.4234

The p-value is the probability of observing the statistic, or one that is more extreme in favor of the alternative, if the null hypothesis is true. That means that if the p-value is smaller than $\alpha$, then the alternative hypothesis is more likely, and if the p-value is larger than $\alpha$, then the null hypothesis is more likely.

import statsmodels.api as sm

convert_old = sum(control_group['converted'] == 1)
convert_new = sum(treatment_group['converted'] == 1)
n_old = n_old
n_new = n_new

conv_diff = (convert_old / n_old) - (convert_new / n_new)
num_diff = n_new - n_old

conv_diff, num_diff

(0.0015782389853555567, 36)

z_score, p_value = proportions_ztest([convert_new, convert_old], [n_new, n_old])
z_score, p_value

prop: [0.11880807 0.1203863 ]
k_sample: 2
k_sample == 2 and diff: -0.0015782389853555567
p_pooled: 0.11959708724499628
nobs_fact: 1.3765383026571968e-05
prop_var: False
var_: 1.4494070641686e-06
std_diff: 0.0012039132295014454
zstat: -1.3109241984234394
alternative is two-sided.
pvalue: 0.18988337448195103

(-1.3109241984234394, 0.18988337448195103)

The p-value is greater than the alpha of 0.05, so we fail to reject the null hypothesis according to this.

This does not agree with the findings of the earlier findings.

Part III - A Regression Approach¶

df2['intercept'] = 1
df2['ab_page'] = 0

df2.loc[df2['group'] == 'treatment', 'ab_page'] = 1

df2.head(20)

log_mod = sm.Logit(df2['converted'], df2[['intercept', 'ab_page']])
results = log_mod.fit()

Optimization terminated successfully.
         Current function value: 0.366118
         Iterations 6

results.summary()

P-value: 0.190 It appears to be rounded to the thousandth place.

countries_df = pd.read_csv('countries.csv')
countries_df.head()

df3 = pd.DataFrame.join(df2, countries_df['country'])
df3.head()

df3['country'].value_counts()

US    200926
UK     71501
CA     14315
Name: country, dtype: int64

df3[['CA','UK', 'US']] = pd.get_dummies(df3['country'])
df3 = df3.drop('US', axis=1)
df3.head()

log_mod2 = sm.Logit(df3['converted'], df3[['intercept', 'ab_page', 'CA', 'UK']])
results2 = log_mod2.fit()

Optimization terminated successfully.
         Current function value: 0.366116
         Iterations 6

results2.summary()

Given that the country p-values are higher than 0.05, the country does not appear to have an impact of conversion.

Dep. Variable:	converted	No. Observations:	290584
Model:	Logit	Df Residuals:	290582
Method:	MLE	Df Model:	1
Date:	Thu, 13 Feb 2020	Pseudo R-squ.:	8.077e-06
Time:	00:10:50	Log-Likelihood:	-1.0639e+05
converged:	True	LL-Null:	-1.0639e+05
Covariance Type:	nonrobust	LLR p-value:	0.1899

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	-1.9888	0.008	-246.669	0.000	-2.005	-1.973
ab_page	-0.0150	0.011	-1.311	0.190	-0.037	0.007

Dep. Variable:	converted	No. Observations:	290584
Model:	Logit	Df Residuals:	290580
Method:	MLE	Df Model:	3
Date:	Thu, 13 Feb 2020	Pseudo R-squ.:	1.420e-05
Time:	00:10:52	Log-Likelihood:	-1.0639e+05
converged:	True	LL-Null:	-1.0639e+05
Covariance Type:	nonrobust	LLR p-value:	0.3884

	coef	std err	z	P>\|z\|	[0.025	0.975]
intercept	-1.9891	0.009	-224.021	0.000	-2.007	-1.972
ab_page	-0.0150	0.011	-1.308	0.191	-0.037	0.007
CA	-0.0258	0.027	-0.959	0.338	-0.078	0.027
UK	0.0065	0.013	0.485	0.628	-0.020	0.033

	user_id	timestamp	group	landing_page	converted
0	851104	2017-01-21 22:11:48.556739	control	old_page	0
1	804228	2017-01-12 08:01:45.159739	control	old_page	0
2	661590	2017-01-11 16:55:06.154213	treatment	new_page	0
3	853541	2017-01-08 18:28:03.143765	treatment	new_page	0
4	864975	2017-01-21 01:52:26.210827	control	old_page	1

	user_id	country
0	834778	UK
1	928468	US
2	822059	UK
3	711597	UK
4	710616	UK