In [1]:
import os
import pandas as pd
In [2]:
for root, dirs, files in os.walk("data/chapter_1"):  
    for filename in files:
        print(filename)
average_quantity.csv
cohort_counts.csv
online.csv
._average_quantity.csv
._cohort_counts.csv
._online.csv
In [8]:
import pandas as pd
cohort_counts = pd.read_csv("data/chapter_1/cohort_counts.csv",index_col='CohortMonth')
In [26]:
cohort_counts.head()
Out[26]:
1 2 3 4 5 6 7 8 9 10 11 12 13
CohortMonth
2010-12-01 716.0 246.0 221.0 251.0 245.0 285.0 249.0 236.0 240.0 265.0 254.0 348.0 172.0
2011-01-01 332.0 69.0 82.0 81.0 110.0 90.0 82.0 86.0 104.0 102.0 124.0 45.0 NaN
2011-02-01 316.0 58.0 57.0 83.0 85.0 74.0 80.0 83.0 86.0 95.0 28.0 NaN NaN
2011-03-01 388.0 63.0 100.0 76.0 83.0 67.0 98.0 85.0 107.0 38.0 NaN NaN NaN
2011-04-01 255.0 49.0 52.0 49.0 47.0 52.0 56.0 59.0 17.0 NaN NaN NaN NaN

Use loc, idea from Stack Overflow

In [29]:
cohort_counts.loc[cohort_counts.index.values == '2011-01-01']
Out[29]:
1 2 3 4 5 6 7 8 9 10 11 12 13
CohortMonth
2011-01-01 332.0 69.0 82.0 81.0 110.0 90.0 82.0 86.0 104.0 102.0 124.0 45.0 NaN

Thus, in Jan, there are 332 transactions.

In [38]:
online = pd.read_csv("data/chapter_1/online.csv",index_col = 0)
In [46]:
online.head()
Out[46]:
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country
416792 572558 22745 POPPY'S PLAYHOUSE BEDROOM 6 2011-10-25 08:26:00 2.10 14286 United Kingdom
482904 577485 23196 VINTAGE LEAF MAGNETIC NOTEPAD 1 2011-11-20 11:56:00 1.45 16360 United Kingdom
263743 560034 23299 FOOD COVER WITH BEADS SET 2 6 2011-07-14 13:35:00 3.75 13933 United Kingdom
495549 578307 72349B SET/6 PURPLE BUTTERFLY T-LIGHTS 1 2011-11-23 15:53:00 2.10 17290 United Kingdom
204384 554656 21756 BATH BUILDING BLOCK WORD 3 2011-05-25 13:36:00 5.95 17663 United Kingdom
In [89]:
online.InvoiceDate.describe()
Out[89]:
count                   70864
unique                  13140
top       2011-11-14 15:27:00
freq                      128
Name: InvoiceDate, dtype: object

Use apply function. Because .date() only supports elements instead of series. For example,

In [93]:
import pandas as pd
date=pd.to_datetime('2015-01-08 22:44:09').date()
print(date)
2015-01-08
In [91]:
def get_date(x):
    return x.date()
online['InvoiceDay'] = pd.to_datetime(online['InvoiceDate']).apply(get_date)
In [111]:
online['CohortDay'] = online.groupby('CustomerID')['InvoiceDay'].transform('min')
In [113]:
online.head()
Out[113]:
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country InvoiceDay CohortDay
416792 572558 22745 POPPY'S PLAYHOUSE BEDROOM 6 2011-10-25 08:26:00 2.10 14286 United Kingdom 2011-10-25 2011-04-11
482904 577485 23196 VINTAGE LEAF MAGNETIC NOTEPAD 1 2011-11-20 11:56:00 1.45 16360 United Kingdom 2011-11-20 2011-09-12
263743 560034 23299 FOOD COVER WITH BEADS SET 2 6 2011-07-14 13:35:00 3.75 13933 United Kingdom 2011-07-14 2011-07-14
495549 578307 72349B SET/6 PURPLE BUTTERFLY T-LIGHTS 1 2011-11-23 15:53:00 2.10 17290 United Kingdom 2011-11-23 2011-11-23
204384 554656 21756 BATH BUILDING BLOCK WORD 3 2011-05-25 13:36:00 5.95 17663 United Kingdom 2011-05-25 2011-02-25

Get the first transcation date per customer.

In [133]:
def get_date_int(df,col):
    year  = df[col].map(lambda x: x.year)
    month = df[col].map(lambda x: x.month)
    day   = df[col].map(lambda x: x.day)
    return year, month, day
In [134]:
online.head()
Out[134]:
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country InvoiceDay CohortDay
416792 572558 22745 POPPY'S PLAYHOUSE BEDROOM 6 2011-10-25 08:26:00 2.10 14286 United Kingdom 2011-10-25 2011-04-11
482904 577485 23196 VINTAGE LEAF MAGNETIC NOTEPAD 1 2011-11-20 11:56:00 1.45 16360 United Kingdom 2011-11-20 2011-09-12
263743 560034 23299 FOOD COVER WITH BEADS SET 2 6 2011-07-14 13:35:00 3.75 13933 United Kingdom 2011-07-14 2011-07-14
495549 578307 72349B SET/6 PURPLE BUTTERFLY T-LIGHTS 1 2011-11-23 15:53:00 2.10 17290 United Kingdom 2011-11-23 2011-11-23
204384 554656 21756 BATH BUILDING BLOCK WORD 3 2011-05-25 13:36:00 5.95 17663 United Kingdom 2011-05-25 2011-02-25
In [136]:
invoice_year, invoice_month, invoice_day = get_date_int(online, 'InvoiceDay')
cohort_year, cohort_month, cohort_day = get_date_int(online, 'CohortDay')

Calculate time offset in days.

In [141]:
online['CohortIndex'] = \
    (invoice_year - cohort_year) * 365 + \
    (invoice_month - cohort_month) * 30 + \
    (invoice_day - cohort_day) * 1 + \
    1

When you get time cohort and cohort index, the last one we do is to get the metrics. Let's think about the customer retention.

In [144]:
grouping = online.groupby(['CohortDay', 'CohortIndex'])
In [147]:
cohort_data = grouping['CustomerID'].apply(pd.Series.nunique).reset_index()
cohort_data.head()
Out[147]:
CohortDay CohortIndex CustomerID
0 2010-12-01 1 69
1 2010-12-01 2 3
2 2010-12-01 3 1
3 2010-12-01 5 3
4 2010-12-01 6 5

.reset_index() helps get CohortDay and CohortIndex as normal columns.

In [149]:
cohort_counts = cohort_data.pivot(index='CohortDay', columns='CohortIndex', values='CustomerID')
cohort_counts.head()
Out[149]:
CohortIndex 1 2 3 4 5 6 7 8 9 10 ... 365 366 367 368 369 370 371 372 373 374
CohortDay
2010-12-01 69.0 3.0 1.0 NaN 3.0 5.0 2.0 4.0 4.0 2.0 ... 4.0 4.0 6.0 NaN NaN 3.0 5.0 3.0 4.0 1.0
2010-12-02 78.0 NaN NaN NaN 1.0 3.0 2.0 4.0 3.0 NaN ... 2.0 2.0 NaN NaN 3.0 1.0 2.0 2.0 1.0 NaN
2010-12-03 30.0 NaN 1.0 1.0 NaN NaN 1.0 NaN NaN NaN ... 2.0 NaN 1.0 2.0 1.0 1.0 1.0 NaN NaN NaN
2010-12-05 59.0 3.0 2.0 1.0 4.0 2.0 NaN 5.0 7.0 3.0 ... 7.0 3.0 2.0 3.0 2.0 1.0 NaN NaN NaN NaN
2010-12-06 57.0 NaN 2.0 1.0 1.0 NaN 1.0 3.0 6.0 2.0 ... 1.0 2.0 4.0 3.0 1.0 NaN NaN NaN NaN NaN

5 rows × 374 columns

In [151]:
cohort_sizes = cohort_counts.iloc[:,0]
retention = cohort_counts.divide(cohort_sizes, axis=0)
In [153]:
retention.head()
Out[153]:
CohortIndex 1 2 3 4 5 6 7 8 9 10 ... 365 366 367 368 369 370 371 372 373 374
CohortDay
2010-12-01 1.0 0.043478 0.014493 NaN 0.043478 0.072464 0.028986 0.057971 0.057971 0.028986 ... 0.057971 0.057971 0.086957 NaN NaN 0.043478 0.072464 0.043478 0.057971 0.014493
2010-12-02 1.0 NaN NaN NaN 0.012821 0.038462 0.025641 0.051282 0.038462 NaN ... 0.025641 0.025641 NaN NaN 0.038462 0.012821 0.025641 0.025641 0.012821 NaN
2010-12-03 1.0 NaN 0.033333 0.033333 NaN NaN 0.033333 NaN NaN NaN ... 0.066667 NaN 0.033333 0.066667 0.033333 0.033333 0.033333 NaN NaN NaN
2010-12-05 1.0 0.050847 0.033898 0.016949 0.067797 0.033898 NaN 0.084746 0.118644 0.050847 ... 0.118644 0.050847 0.033898 0.050847 0.033898 0.016949 NaN NaN NaN NaN
2010-12-06 1.0 NaN 0.035088 0.017544 0.017544 NaN 0.017544 0.052632 0.105263 0.035088 ... 0.017544 0.035088 0.070175 0.052632 0.017544 NaN NaN NaN NaN NaN

5 rows × 374 columns

CohortDay is under daily base. I try a monthly base.

In [187]:
online['CohortMonth'] = online['CohortDay'].values.astype('datetime64[M]')

Get the first day of month from Stack Overflow

In [189]:
online['CohortIndexMonth'] = \
    (invoice_year - cohort_year) * 12 + \
    (invoice_month - cohort_month) * 1 + \
    1
In [193]:
grouping = online.groupby(['CohortMonth', 'CohortIndexMonth'])
cohort_data_month = grouping['CustomerID'].apply(pd.Series.nunique).reset_index()
cohort_data_month.head()
Out[193]:
CohortMonth CohortIndexMonth CustomerID
0 2010-12-01 1 716
1 2010-12-01 2 246
2 2010-12-01 3 221
3 2010-12-01 4 251
4 2010-12-01 5 245
In [195]:
cohort_counts_month = cohort_data_month.pivot(index='CohortMonth', columns='CohortIndexMonth', values='CustomerID')
cohort_counts_month.head()
Out[195]:
CohortIndexMonth 1 2 3 4 5 6 7 8 9 10 11 12 13
CohortMonth
2010-12-01 716.0 246.0 221.0 251.0 245.0 285.0 249.0 236.0 240.0 265.0 254.0 348.0 172.0
2011-01-01 332.0 69.0 82.0 81.0 110.0 90.0 82.0 86.0 104.0 102.0 124.0 45.0 NaN
2011-02-01 316.0 58.0 57.0 83.0 85.0 74.0 80.0 83.0 86.0 95.0 28.0 NaN NaN
2011-03-01 388.0 63.0 100.0 76.0 83.0 67.0 98.0 85.0 107.0 38.0 NaN NaN NaN
2011-04-01 255.0 49.0 52.0 49.0 47.0 52.0 56.0 59.0 17.0 NaN NaN NaN NaN
In [196]:
cohort_sizes_month = cohort_counts_month.iloc[:,0]
retention_month = cohort_counts_month.divide(cohort_sizes_month, axis=0)
In [198]:
retention_month.head()
Out[198]:
CohortIndexMonth 1 2 3 4 5 6 7 8 9 10 11 12 13
CohortMonth
2010-12-01 1.0 0.343575 0.308659 0.350559 0.342179 0.398045 0.347765 0.329609 0.335196 0.370112 0.354749 0.486034 0.240223
2011-01-01 1.0 0.207831 0.246988 0.243976 0.331325 0.271084 0.246988 0.259036 0.313253 0.307229 0.373494 0.135542 NaN
2011-02-01 1.0 0.183544 0.180380 0.262658 0.268987 0.234177 0.253165 0.262658 0.272152 0.300633 0.088608 NaN NaN
2011-03-01 1.0 0.162371 0.257732 0.195876 0.213918 0.172680 0.252577 0.219072 0.275773 0.097938 NaN NaN NaN
2011-04-01 1.0 0.192157 0.203922 0.192157 0.184314 0.203922 0.219608 0.231373 0.066667 NaN NaN NaN NaN

We get the number of customer above, next we do the cohort for average price.

In [220]:
online
Out[220]:
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country InvoiceDay CohortDay CohortIndex CohortMonth CohortIndexMonth
416792 572558 22745 POPPY'S PLAYHOUSE BEDROOM 6 2011-10-25 08:26:00 2.10 14286 United Kingdom 2011-10-25 2011-04-11 195 2011-04-01 7
482904 577485 23196 VINTAGE LEAF MAGNETIC NOTEPAD 1 2011-11-20 11:56:00 1.45 16360 United Kingdom 2011-11-20 2011-09-12 69 2011-09-01 3
263743 560034 23299 FOOD COVER WITH BEADS SET 2 6 2011-07-14 13:35:00 3.75 13933 United Kingdom 2011-07-14 2011-07-14 1 2011-07-01 1
495549 578307 72349B SET/6 PURPLE BUTTERFLY T-LIGHTS 1 2011-11-23 15:53:00 2.10 17290 United Kingdom 2011-11-23 2011-11-23 1 2011-11-01 1
204384 554656 21756 BATH BUILDING BLOCK WORD 3 2011-05-25 13:36:00 5.95 17663 United Kingdom 2011-05-25 2011-02-25 91 2011-02-01 4
125615 547051 22028 PENNY FARTHING BIRTHDAY CARD 12 2011-03-20 12:06:00 0.42 12902 United Kingdom 2011-03-20 2011-03-20 1 2011-03-01 1
483123 577493 20724 RED RETROSPOT CHARLOTTE BAG 10 2011-11-20 12:13:00 0.85 17323 United Kingdom 2011-11-20 2011-02-20 271 2011-02-01 10
449888 575143 23343 JUMBO BAG VINTAGE CHRISTMAS 10 2011-11-08 15:37:00 2.08 13643 United Kingdom 2011-11-08 2011-09-02 67 2011-09-01 3
127438 547223 22934 BAKING MOULD EASTER EGG WHITE CHOC 2 2011-03-21 15:10:00 2.95 12867 United Kingdom 2011-03-21 2010-12-14 103 2010-12-01 4
189515 553156 21212 PACK OF 72 RETROSPOT CAKE CASES 2 2011-05-13 14:58:00 0.55 17841 United Kingdom 2011-05-13 2010-12-01 168 2010-12-01 6
181007 552418 22693 GROW A FLYTRAP OR SUNFLOWER IN TIN 24 2011-05-09 12:42:00 1.25 13971 United Kingdom 2011-05-09 2011-02-27 73 2011-02-01 4
439284 574444 22086 PAPER CHAIN KIT 50'S CHRISTMAS 2 2011-11-04 11:41:00 2.95 18122 United Kingdom 2011-11-04 2011-07-12 113 2011-07-01 5
198992 554079 23092 LARGE ANTIQUE WHITE PHOTO FRAME 1 2011-05-22 11:41:00 7.90 14546 United Kingdom 2011-05-22 2011-04-05 48 2011-04-01 2
223228 556475 23322 LARGE WHITE HEART OF WICKER 6 2011-06-12 12:03:00 2.95 18109 United Kingdom 2011-06-12 2010-12-12 186 2010-12-01 7
355073 567905 23336 EGG FRYING PAN PINK 6 2011-09-22 16:37:00 2.08 12952 United Kingdom 2011-09-22 2011-09-22 1 2011-09-01 1
348881 567474 22897 OVEN MITT APPLES DESIGN 2 2011-09-20 12:53:00 1.45 13448 United Kingdom 2011-09-20 2010-12-01 295 2010-12-01 10
419995 572866 22384 LUNCH BAG PINK POLKADOT 1 2011-10-26 13:07:00 1.65 15005 United Kingdom 2011-10-26 2010-12-08 324 2010-12-01 11
309002 564060 23241 TREASURE TIN GYMKHANA DESIGN 1 2011-08-22 14:37:00 2.08 17073 United Kingdom 2011-08-22 2011-08-22 1 2011-08-01 1
270135 560547 84879 ASSORTED COLOUR BIRD ORNAMENT 8 2011-07-19 12:39:00 1.69 17419 United Kingdom 2011-07-19 2010-12-07 228 2010-12-01 8
321727 565198 22727 ALARM CLOCK BAKELIKE RED 4 2011-09-01 16:18:00 3.75 16729 United Kingdom 2011-09-01 2011-04-12 140 2011-04-01 6
232256 557311 21609 SET 12 LAVENDER BOTANICAL T-LIGHTS 2 2011-06-19 15:36:00 2.95 15700 United Kingdom 2011-06-19 2011-06-19 1 2011-06-01 1
116024 546182 85206A CREAM FELT EASTER EGG BASKET 6 2011-03-10 10:53:00 1.65 14334 United Kingdom 2011-03-10 2010-12-16 90 2010-12-01 4
300478 563198 22456 NATURAL SLATE CHALKBOARD LARGE 18 2011-08-14 10:38:00 4.25 15270 United Kingdom 2011-08-14 2011-02-02 193 2011-02-01 7
95854 544467 22077 6 RIBBONS RUSTIC CHARM 3 2011-02-20 15:51:00 1.65 14502 United Kingdom 2011-02-20 2011-01-30 21 2011-01-01 2
401992 571457 22114 HOT WATER BOTTLE TEA AND SYMPATHY 4 2011-10-17 14:01:00 4.25 12963 United Kingdom 2011-10-17 2010-12-09 314 2010-12-01 11
3754 536673 21803 CHRISTMAS TREE STAR DECORATION 36 2010-12-02 12:11:00 0.42 14060 United Kingdom 2010-12-02 2010-12-02 1 2010-12-01 1
294186 562688 22749 FELTCRAFT PRINCESS CHARLOTTE DOLL 2 2011-08-08 13:53:00 3.75 13869 United Kingdom 2011-08-08 2010-12-17 237 2010-12-01 9
14738 537614 22632 HAND WARMER RED RETROSPOT 7 2010-12-07 13:29:00 2.10 16904 United Kingdom 2010-12-07 2010-12-07 1 2010-12-01 1
514643 579712 22593 CHRISTMAS GINGHAM STAR 6 2011-11-30 14:17:00 0.85 15467 United Kingdom 2011-11-30 2011-07-10 141 2011-07-01 5
46727 540357 22382 LUNCH BAG SPACEBOY DESIGN 20 2011-01-06 15:14:00 1.65 13093 United Kingdom 2011-01-06 2010-12-01 41 2010-12-01 2
... ... ... ... ... ... ... ... ... ... ... ... ... ...
57703 541218 85180A RED HEARTS LIGHT CHAIN 12 2011-01-14 14:02:00 1.25 16638 United Kingdom 2011-01-14 2011-01-14 1 2011-01-01 1
283170 561695 22423 REGENCY CAKESTAND 3 TIER 6 2011-07-29 10:00:00 12.75 14005 United Kingdom 2011-07-29 2011-05-08 82 2011-05-01 3
156909 550183 22697 GREEN REGENCY TEACUP AND SAUCER 6 2011-04-14 17:58:00 2.95 14105 United Kingdom 2011-04-14 2011-04-14 1 2011-04-01 1
166902 550957 22417 PACK OF 60 SPACEBOY CAKE CASES 3 2011-04-21 16:37:00 0.55 18283 United Kingdom 2011-04-21 2011-01-06 106 2011-01-01 4
261718 559884 22608 PENS ASSORTED FUNKY JEWELED 72 2011-07-13 11:34:00 0.19 16843 United Kingdom 2011-07-13 2011-01-16 178 2011-01-01 7
331922 566043 22113 GREY HEART HOT WATER BOTTLE 4 2011-09-08 13:54:00 4.25 13769 United Kingdom 2011-09-08 2010-12-07 277 2010-12-01 10
501109 578818 23579 SNACK TRAY I LOVE LONDON 2 2011-11-25 13:47:00 1.95 17920 United Kingdom 2011-11-25 2010-12-01 360 2010-12-01 12
157132 550193 22796 PHOTO FRAME 3 CLASSIC HANGING 4 2011-04-15 09:27:00 9.95 13952 United Kingdom 2011-04-15 2011-04-15 1 2011-04-01 1
104294 545162 22212 FOUR HOOK WHITE LOVEBIRDS 6 2011-02-28 13:35:00 2.10 16187 United Kingdom 2011-02-28 2011-02-28 1 2011-02-01 1
195544 553721 22402 MAGNETS PACK OF 4 VINTAGE COLLAGE 12 2011-05-18 16:37:00 1.25 16570 United Kingdom 2011-05-18 2011-02-01 108 2011-02-01 4
333304 566193 22055 MINI CAKE STAND HANGING STRAWBERY 8 2011-09-09 13:37:00 1.65 14961 United Kingdom 2011-09-09 2010-12-14 271 2010-12-01 10
238893 557958 23301 GARDENERS KNEELING PAD KEEP CALM 2 2011-06-23 19:27:00 1.65 16161 United Kingdom 2011-06-23 2010-12-06 203 2010-12-01 7
21706 538113 85185B PINK HORSE SOCK PUPPET 6 2010-12-09 15:29:00 2.95 15811 United Kingdom 2010-12-09 2010-12-09 1 2010-12-01 1
146503 548991 21066 VINTAGE RED MUG 1 2011-04-05 13:48:00 1.25 16915 United Kingdom 2011-04-05 2011-04-05 1 2011-04-01 1
99103 544697 35637C PINK STRING CURTAIN WITH POLE 2 2011-02-23 09:44:00 5.95 13453 United Kingdom 2011-02-23 2011-02-23 1 2011-02-01 1
163719 550622 22714 CARD BIRTHDAY COWBOY 12 2011-04-19 13:49:00 0.42 13735 United Kingdom 2011-04-19 2011-04-19 1 2011-04-01 1
381606 569868 22150 3 STRIPEY MICE FELTCRAFT 6 2011-10-06 14:59:00 1.95 13018 United Kingdom 2011-10-06 2011-02-17 230 2011-02-01 9
31688 539007 22720 SET OF 3 CAKE TINS PANTRY DESIGN 1 2010-12-15 13:00:00 4.95 17894 United Kingdom 2010-12-15 2010-12-08 8 2010-12-01 1
468255 576405 23247 BISCUIT TIN 50'S CHRISTMAS 8 2011-11-15 11:14:00 2.89 13735 United Kingdom 2011-11-15 2011-04-19 207 2011-04-01 8
390376 570514 22090 PAPER BUNTING RETROSPOT 12 2011-10-11 10:36:00 2.95 13089 United Kingdom 2011-10-11 2010-12-05 312 2010-12-01 11
70850 542103 79321 CHILLI LIGHTS 4 2011-01-25 13:26:00 4.95 13198 United Kingdom 2011-01-25 2010-12-13 48 2010-12-01 2
158659 550305 22651 GENTLEMAN SHIRT REPAIR KIT 12 2011-04-17 10:27:00 0.85 17920 United Kingdom 2011-04-17 2010-12-01 142 2010-12-01 5
8213 537130 21181 PLEASE ONE PERSON METAL SIGN 1 2010-12-05 12:23:00 2.10 17796 United Kingdom 2010-12-05 2010-12-05 1 2010-12-01 1
532738 580996 21481 FAWN BLUE HOT WATER BOTTLE 2 2011-12-06 17:18:00 3.75 14578 United Kingdom 2011-12-06 2011-12-06 1 2011-12-01 1
126250 547069 22907 PACK OF 20 NAPKINS PANTRY DESIGN 1 2011-03-20 13:52:00 0.85 16710 United Kingdom 2011-03-20 2010-12-06 110 2010-12-01 4
448575 575063 22804 PINK HANGING HEART T-LIGHT HOLDER 1 2011-11-08 12:32:00 2.95 16764 United Kingdom 2011-11-08 2011-07-19 110 2011-07-01 5
19970 537963 21733 RED HANGING HEART T-LIGHT HOLDER 12 2010-12-09 11:30:00 2.95 13369 United Kingdom 2010-12-09 2010-12-09 1 2010-12-01 1
264154 560089 23238 SET OF 4 KNICK KNACK TINS LONDON 1 2011-07-14 16:45:00 4.15 12748 United Kingdom 2011-07-14 2010-12-05 225 2010-12-01 8
199822 554103 85123A WHITE HANGING HEART T-LIGHT HOLDER 2 2011-05-22 13:51:00 2.95 15555 United Kingdom 2011-05-22 2010-12-05 173 2010-12-01 6
312243 564342 22910 PAPER CHAIN KIT VINTAGE CHRISTMAS 2 2011-08-24 14:53:00 2.95 16340 United Kingdom 2011-08-24 2011-08-24 1 2011-08-01 1

70864 rows × 13 columns

In [226]:
average_quantity =  online.groupby(['CohortMonth','CohortIndexMonth'])['UnitPrice'].mean().reset_index() \
    .pivot(index='CohortMonth', columns='CohortIndexMonth', values='UnitPrice') \
    .round(1)
# online.groupby(['CohortMonth','CohortMonth'])['UnitPrice'].apply(pd.Series.mean)
average_quantity
Out[226]:
CohortIndexMonth 1 2 3 4 5 6 7 8 9 10 11 12 13
CohortMonth
2010-12-01 3.0 3.0 3.0 2.8 2.7 6.9 2.8 3.0 2.7 2.7 3.0 2.8 2.6
2011-01-01 3.2 3.1 3.0 3.0 3.1 3.0 3.0 2.5 2.7 2.9 2.6 2.0 NaN
2011-02-01 3.1 4.0 3.3 2.9 3.3 2.9 2.8 2.7 2.9 2.7 3.1 NaN NaN
2011-03-01 3.5 3.6 3.5 2.8 2.7 2.5 2.7 2.9 2.5 2.4 NaN NaN NaN
2011-04-01 3.3 4.4 3.4 2.6 2.8 2.8 2.8 2.6 2.6 NaN NaN NaN NaN
2011-05-01 3.1 2.8 2.5 2.7 2.5 2.3 2.7 2.3 NaN NaN NaN NaN NaN
2011-06-01 2.8 2.4 2.7 3.1 2.5 2.4 2.5 NaN NaN NaN NaN NaN NaN
2011-07-01 3.2 3.1 3.4 2.5 2.4 2.3 NaN NaN NaN NaN NaN NaN NaN
2011-08-01 2.9 3.7 5.4 6.9 4.2 NaN NaN NaN NaN NaN NaN NaN NaN
2011-09-01 2.9 3.1 3.0 2.6 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2011-10-01 2.9 2.7 2.5 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2011-11-01 2.5 2.1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2011-12-01 1.9 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

Visualization

In [228]:
# Import seaborn package as sns
import seaborn as sns
import matplotlib.pyplot as plt

# Initialize an 8 by 6 inches plot figure
plt.figure(figsize=(8, 6))

# Add a title
plt.title('Average Spend by Monthly Cohorts')

# Create the heatmap
sns.heatmap(average_quantity, annot=True, cmap='Blues')
plt.show()