This is the notebook used to generate the dataset used on the FKLearn Tutorial.ipynb

  • The FKLearn Tutorial notebook was used to introduce FKLearn on Nubank’s Data Science Meetup and the idea was to give an overall idea on how and why you should use FKLearn
[1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
[2]:
from scipy.stats import truncnorm

def get_truncated_normal(mean=0, sd=1, low=0, upp=10):
    return truncnorm(
        (low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)
[3]:
ids = range(0, 10000)
months = range(1, 24)
[4]:
unique_entries = np.array(np.meshgrid(ids, months)).T.reshape(-1, 2)
[5]:
unique_entries.shape
[5]:
(230000, 2)
[6]:
np.power(np.array([months]), 0.1)
[6]:
array([[1.        , 1.07177346, 1.11612317, 1.14869835, 1.17461894,
        1.1962312 , 1.21481404, 1.23114441, 1.24573094, 1.25892541,
        1.27098162, 1.28208885, 1.29239222, 1.30200545, 1.31101942,
        1.31950791, 1.32753167, 1.33514136, 1.34237965, 1.34928285,
        1.35588211, 1.36220437, 1.36827308]])
[7]:

X = get_truncated_normal(5000, 2000, 300, 20000) income_by_id = X.rvs(len(ids)) income_by_id = np.repeat(income_by_id, len(months)) income_wrong_entry = np.random.binomial(1, 1 - 0.05, unique_entries.shape[0]).astype(bool) income_array = np.where(income_wrong_entry == True, income_by_id.reshape(1, -1), 9999999)
[8]:
income_array.shape
[8]:
(1, 230000)
[9]:
plt.hist(income_by_id, bins = range(0, 20000, 500))
[9]:
(array([8.5100e+02, 2.4150e+03, 3.8180e+03, 5.8880e+03, 9.3610e+03,
        1.2420e+04, 1.6031e+04, 1.9228e+04, 2.1091e+04, 2.3276e+04,
        2.2448e+04, 2.1942e+04, 2.0263e+04, 1.4789e+04, 1.2236e+04,
        9.2230e+03, 5.8420e+03, 3.9100e+03, 2.3230e+03, 1.2880e+03,
        7.3600e+02, 2.7600e+02, 9.2000e+01, 1.8400e+02, 2.3000e+01,
        2.3000e+01, 2.3000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]),
 array([    0,   500,  1000,  1500,  2000,  2500,  3000,  3500,  4000,
         4500,  5000,  5500,  6000,  6500,  7000,  7500,  8000,  8500,
         9000,  9500, 10000, 10500, 11000, 11500, 12000, 12500, 13000,
        13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500,
        18000, 18500, 19000, 19500]),
 <a list of 39 Patch objects>)
../_images/examples_fklearn_overview_dataset_generation_9_1.png
[10]:
customer_creation_date = []
for m_id in np.random.choice(len(months) * 31, len(ids)):
    customer_creation_date.append(np.datetime64("2017-01-01") + np.timedelta64(int(m_id), 'D'))
customer_creation_date = np.repeat(np.array(customer_creation_date), len(months))
[11]:
phone_branches = ["samsung", "motorola", "iphone", "lg"]
random_phone = np.random.choice(4, len(ids), p=[0.15, 0.3, 0.25, 0.3])
cellphone_branch = [phone_branches[i] for i in random_phone]
cellphone_branch = np.repeat(cellphone_branch, len(months))
phone_factor = [0.7, 0.3, 0.9, 0.45]
cellphone_factor = [phone_factor[i] for i in random_phone]
cellphone_factor = np.repeat(cellphone_factor, len(months))
[12]:
cellphone_factor
[12]:
array([0.45, 0.45, 0.45, ..., 0.3 , 0.3 , 0.3 ])
[13]:
bureau_missing = np.random.binomial(1, 1 - 0.1, unique_entries.shape[0]).astype(bool)
Y = get_truncated_normal(500, 250, 0, 1000)
bureau_score = Y.rvs(unique_entries.shape[0])
monthly_factor = np.tile(np.power(np.array(months), 0.2), len(ids))
bureau_score = np.where(bureau_missing == True, bureau_score, np.nan) / monthly_factor
[14]:
bureau_score
[14]:
array([395.94580788, 415.29087644, 159.24609131, ..., 433.25966177,
       297.1819245 ,          nan])
[15]:
plt.hist(bureau_score, bins = range(0, 1000, 25))
[15]:
(array([ 2219.,  2924.,  3928.,  4948.,  6192.,  7456.,  8735., 10123.,
        11212., 11909., 12934., 13351., 12961., 12877., 12282., 11558.,
        10431.,  9236.,  8037.,  7139.,  5836.,  4894.,  3652.,  2821.,
         2063.,  1654.,  1256.,  1029.,   817.,   572.,   480.,   375.,
          275.,   240.,   206.,   132.,    78.,    86.,    69.]),
 array([  0,  25,  50,  75, 100, 125, 150, 175, 200, 225, 250, 275, 300,
        325, 350, 375, 400, 425, 450, 475, 500, 525, 550, 575, 600, 625,
        650, 675, 700, 725, 750, 775, 800, 825, 850, 875, 900, 925, 950,
        975]),
 <a list of 39 Patch objects>)
../_images/examples_fklearn_overview_dataset_generation_15_1.png
[16]:
willingness_to_spend = np.repeat(np.random.normal(500, 200, len(ids)), len(months))
[17]:
willingness_to_spend
[17]:
array([933.87350032, 933.87350032, 933.87350032, ..., 238.32311792,
       238.32311792, 238.32311792])
[18]:
plt.hist(willingness_to_spend, bins = range(-1000, 1500, 50))
[18]:
(array([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 2.3000e+01, 0.0000e+00,
        2.3000e+01, 4.6000e+01, 2.3000e+02, 4.6000e+02, 6.2100e+02,
        1.1730e+03, 2.4610e+03, 4.1400e+03, 6.0030e+03, 7.9580e+03,
        1.1868e+04, 1.4789e+04, 1.9044e+04, 2.0493e+04, 2.3276e+04,
        2.3184e+04, 2.2379e+04, 1.8860e+04, 1.5571e+04, 1.2167e+04,
        9.5680e+03, 5.9570e+03, 4.3930e+03, 2.2540e+03, 1.4030e+03,
        9.6600e+02, 3.9100e+02, 1.8400e+02, 2.3000e+01, 6.9000e+01,
        0.0000e+00, 2.3000e+01, 0.0000e+00, 0.0000e+00]),
 array([-1000,  -950,  -900,  -850,  -800,  -750,  -700,  -650,  -600,
         -550,  -500,  -450,  -400,  -350,  -300,  -250,  -200,  -150,
         -100,   -50,     0,    50,   100,   150,   200,   250,   300,
          350,   400,   450,   500,   550,   600,   650,   700,   750,
          800,   850,   900,   950,  1000,  1050,  1100,  1150,  1200,
         1250,  1300,  1350,  1400,  1450]),
 <a list of 49 Patch objects>)
../_images/examples_fklearn_overview_dataset_generation_18_1.png
[31]:
noise_feature = np.random.normal(1000, 100, unique_entries.shape[0])
[32]:
a = (willingness_to_spend)
a_norm = (a - a.min()) / (a.max() - a.min())
b = (income_array)
b_norm = (b - b.min()) / (b.max() - b.min())
c = cellphone_factor * willingness_to_spend
c_norm = (c - c.min()) / (c.max() - c.min())
d = (np.where(np.isnan(bureau_score), 300.0, bureau_score))
d_norm = (d - d.min()) / (d.max() - d.min())
# e = np.random.normal(1, 0.3, unique_entries.shape[0])
W = get_truncated_normal(2000, 100, 0, 50000)
spend = (a_norm + b_norm + c_norm + d_norm) * W.rvs(unique_entries.shape[0])
[21]:
spend
[21]:
array([[6476.47307951, 4740.97909678, 3348.94742391, ..., 2367.47238387,
        4354.480922  , 3508.97334522]])
[22]:
spend.shape
[22]:
(1, 230000)
[23]:
income_array.shape
[23]:
(1, 230000)
[33]:
initial_df = (pd.DataFrame(
    unique_entries, columns=["id", "month"]
).assign(
       income=income_array.T,
       created_at=customer_creation_date.T,
       phone_type=cellphone_branch.T,
       bureau_score=bureau_score.T,
       spend_desire=willingness_to_spend.T,
       random_noise=noise_feature.T,
       monthly_spend=spend.T,
       month_date=lambda df: df.month * 31 + np.datetime64("2017-01-01")
)
.loc[lambda df: df.month_date >= df.created_at])

[34]:
plt.plot(sorted(initial_df.month.unique()), initial_df.groupby("month").agg({"bureau_score": "mean"}))
[34]:
[<matplotlib.lines.Line2D at 0x1a24b54940>]
../_images/examples_fklearn_overview_dataset_generation_25_1.png
[35]:
initial_df
[35]:
id month income created_at phone_type bureau_score spend_desire random_noise monthly_spend month_date
10 0 11 2.662664e+03 2017-11-24 lg NaN 933.873500 1139.988267 3044.984797 2017-12-08
11 0 12 2.662664e+03 2017-11-24 lg 259.050372 933.873500 949.164731 2911.910942 2018-01-08
12 0 13 2.662664e+03 2017-11-24 lg 295.523138 933.873500 829.196293 2867.779678 2018-02-08
13 0 14 2.662664e+03 2017-11-24 lg 260.729691 933.873500 1001.109359 3160.060523 2018-03-11
14 0 15 2.662664e+03 2017-11-24 lg 396.889624 933.873500 997.938315 3420.741792 2018-04-11
15 0 16 2.662664e+03 2017-11-24 lg 78.479225 933.873500 1032.025854 2484.013650 2018-05-12
16 0 17 2.662664e+03 2017-11-24 lg 79.617325 933.873500 969.913448 2454.602869 2018-06-12
17 0 18 2.662664e+03 2017-11-24 lg 177.472256 933.873500 766.796672 2803.507917 2018-07-13
18 0 19 2.662664e+03 2017-11-24 lg 189.046991 933.873500 907.258777 2580.205489 2018-08-13
19 0 20 2.662664e+03 2017-11-24 lg 296.367569 933.873500 1063.450859 3232.765577 2018-09-13
20 0 21 2.662664e+03 2017-11-24 lg 106.914660 933.873500 1095.115641 2722.341319 2018-10-14
21 0 22 2.662664e+03 2017-11-24 lg 111.076808 933.873500 964.705725 2334.885221 2018-11-14
22 0 23 2.662664e+03 2017-11-24 lg 204.017107 933.873500 999.699641 2862.418232 2018-12-15
44 1 22 3.919847e+03 2018-10-31 samsung 51.622959 661.945869 716.221137 2415.757986 2018-11-14
45 1 23 3.919847e+03 2018-10-31 samsung 335.759979 661.945869 817.390238 2539.717872 2018-12-15
59 2 14 7.822244e+03 2018-02-19 lg NaN 538.113849 934.655776 2215.079016 2018-03-11
60 2 15 7.822244e+03 2018-02-19 lg 140.761112 538.113849 834.796711 1855.797279 2018-04-11
61 2 16 7.822244e+03 2018-02-19 lg 191.061416 538.113849 999.442880 2135.130741 2018-05-12
62 2 17 7.822244e+03 2018-02-19 lg 258.865637 538.113849 1006.651873 2278.116638 2018-06-12
63 2 18 7.822244e+03 2018-02-19 lg 233.545889 538.113849 1053.284574 2277.614906 2018-07-13
64 2 19 7.822244e+03 2018-02-19 lg 74.862513 538.113849 1124.678558 1797.577610 2018-08-13
65 2 20 7.822244e+03 2018-02-19 lg 331.113763 538.113849 1082.653241 2380.970903 2018-09-13
66 2 21 7.822244e+03 2018-02-19 lg 313.764030 538.113849 851.352016 2384.972242 2018-10-14
67 2 22 7.822244e+03 2018-02-19 lg NaN 538.113849 946.388351 2334.708744 2018-11-14
68 2 23 7.822244e+03 2018-02-19 lg 264.382012 538.113849 1078.967846 2324.311034 2018-12-15
82 3 14 6.710126e+03 2018-02-26 motorola 336.382952 219.718444 1086.431562 1576.195080 2018-03-11
83 3 15 6.710126e+03 2018-02-26 motorola 294.471077 219.718444 977.412880 1569.622583 2018-04-11
84 3 16 6.710126e+03 2018-02-26 motorola 217.297753 219.718444 832.223156 1388.277215 2018-05-12
85 3 17 6.710126e+03 2018-02-26 motorola 233.320385 219.718444 1100.760524 1489.505004 2018-06-12
86 3 18 6.710126e+03 2018-02-26 motorola NaN 219.718444 956.273725 1557.915921 2018-07-13
... ... ... ... ... ... ... ... ... ... ...
229966 9998 13 4.302987e+03 2017-11-20 lg 394.733451 741.394048 865.889465 2787.969664 2018-02-08
229967 9998 14 4.302987e+03 2017-11-20 lg 103.497621 741.394048 1016.677574 2439.669647 2018-03-11
229968 9998 15 4.302987e+03 2017-11-20 lg 324.048521 741.394048 1008.213981 2620.098144 2018-04-11
229969 9998 16 4.302987e+03 2017-11-20 lg 285.229650 741.394048 942.462260 2596.108281 2018-05-12
229970 9998 17 9.999999e+06 2017-11-20 lg 279.223919 741.394048 936.814390 4387.542945 2018-06-12
229971 9998 18 4.302987e+03 2017-11-20 lg 434.538705 741.394048 1104.925121 2856.148411 2018-07-13
229972 9998 19 4.302987e+03 2017-11-20 lg 228.848144 741.394048 991.911761 2430.254018 2018-08-13
229973 9998 20 4.302987e+03 2017-11-20 lg 222.192479 741.394048 895.700387 2713.008033 2018-09-13
229974 9998 21 4.302987e+03 2017-11-20 lg 472.168078 741.394048 999.813229 2877.605278 2018-10-14
229975 9998 22 4.302987e+03 2017-11-20 lg 315.973050 741.394048 1007.066609 2742.294489 2018-11-14
229976 9998 23 4.302987e+03 2017-11-20 lg 351.939005 741.394048 1024.544743 2690.467294 2018-12-15
229981 9999 5 5.870300e+03 2017-05-14 motorola 535.589626 238.323118 928.305709 2131.044250 2017-06-05
229982 9999 6 5.870300e+03 2017-05-14 motorola 495.595018 238.323118 956.751764 2199.817043 2017-07-06
229983 9999 7 5.870300e+03 2017-05-14 motorola 283.991075 238.323118 939.329302 1663.512745 2017-08-06
229984 9999 8 5.870300e+03 2017-05-14 motorola 322.510656 238.323118 1192.457400 1720.711685 2017-09-06
229985 9999 9 5.870300e+03 2017-05-14 motorola 219.439185 238.323118 863.324305 1427.658077 2017-10-07
229986 9999 10 5.870300e+03 2017-05-14 motorola 573.838047 238.323118 992.516111 2145.163885 2017-11-07
229987 9999 11 5.870300e+03 2017-05-14 motorola 298.403830 238.323118 1007.786207 1652.139410 2017-12-08
229988 9999 12 5.870300e+03 2017-05-14 motorola 381.954312 238.323118 997.821963 1668.924825 2018-01-08
229989 9999 13 5.870300e+03 2017-05-14 motorola 444.736416 238.323118 907.488679 2013.678091 2018-02-08
229990 9999 14 5.870300e+03 2017-05-14 motorola 348.825753 238.323118 1017.164196 1857.592133 2018-03-11
229991 9999 15 5.870300e+03 2017-05-14 motorola 129.942796 238.323118 973.744805 1198.114179 2018-04-11
229992 9999 16 9.999999e+06 2017-05-14 motorola 223.670064 238.323118 998.105372 3618.688650 2018-05-12
229993 9999 17 5.870300e+03 2017-05-14 motorola 307.816191 238.323118 1031.266520 1651.539584 2018-06-12
229994 9999 18 5.870300e+03 2017-05-14 motorola 246.119892 238.323118 1023.319029 1583.706825 2018-07-13
229995 9999 19 5.870300e+03 2017-05-14 motorola 122.151234 238.323118 1155.941675 1360.819971 2018-08-13
229996 9999 20 5.870300e+03 2017-05-14 motorola 348.065944 238.323118 1004.350395 1685.613058 2018-09-13
229997 9999 21 5.870300e+03 2017-05-14 motorola 433.259662 238.323118 1129.281883 1870.543429 2018-10-14
229998 9999 22 5.870300e+03 2017-05-14 motorola 297.181925 238.323118 1130.956199 1569.323027 2018-11-14
229999 9999 23 5.870300e+03 2017-05-14 motorola NaN 238.323118 1066.198167 1806.356185 2018-12-15

119957 rows × 10 columns

[36]:
with_target_df = (initial_df.groupby("id", as_index=False).apply(lambda df: df
                                .assign(
                                       avg_last_2_months_spend=lambda x: x.monthly_spend.rolling(2).mean(),
                                       target=lambda x: x.avg_last_2_months_spend.shift(-2)))
                 .reset_index(drop=True))
[37]:
with_target_df
[37]:
id month income created_at phone_type bureau_score spend_desire random_noise monthly_spend month_date avg_last_2_months_spend target
0 0 11 2.662664e+03 2017-11-24 lg NaN 933.873500 1139.988267 3044.984797 2017-12-08 NaN 2889.845310
1 0 12 2.662664e+03 2017-11-24 lg 259.050372 933.873500 949.164731 2911.910942 2018-01-08 2978.447870 3013.920100
2 0 13 2.662664e+03 2017-11-24 lg 295.523138 933.873500 829.196293 2867.779678 2018-02-08 2889.845310 3290.401158
3 0 14 2.662664e+03 2017-11-24 lg 260.729691 933.873500 1001.109359 3160.060523 2018-03-11 3013.920100 2952.377721
4 0 15 2.662664e+03 2017-11-24 lg 396.889624 933.873500 997.938315 3420.741792 2018-04-11 3290.401158 2469.308259
5 0 16 2.662664e+03 2017-11-24 lg 78.479225 933.873500 1032.025854 2484.013650 2018-05-12 2952.377721 2629.055393
6 0 17 2.662664e+03 2017-11-24 lg 79.617325 933.873500 969.913448 2454.602869 2018-06-12 2469.308259 2691.856703
7 0 18 2.662664e+03 2017-11-24 lg 177.472256 933.873500 766.796672 2803.507917 2018-07-13 2629.055393 2906.485533
8 0 19 2.662664e+03 2017-11-24 lg 189.046991 933.873500 907.258777 2580.205489 2018-08-13 2691.856703 2977.553448
9 0 20 2.662664e+03 2017-11-24 lg 296.367569 933.873500 1063.450859 3232.765577 2018-09-13 2906.485533 2528.613270
10 0 21 2.662664e+03 2017-11-24 lg 106.914660 933.873500 1095.115641 2722.341319 2018-10-14 2977.553448 2598.651727
11 0 22 2.662664e+03 2017-11-24 lg 111.076808 933.873500 964.705725 2334.885221 2018-11-14 2528.613270 NaN
12 0 23 2.662664e+03 2017-11-24 lg 204.017107 933.873500 999.699641 2862.418232 2018-12-15 2598.651727 NaN
13 1 22 3.919847e+03 2018-10-31 samsung 51.622959 661.945869 716.221137 2415.757986 2018-11-14 NaN NaN
14 1 23 3.919847e+03 2018-10-31 samsung 335.759979 661.945869 817.390238 2539.717872 2018-12-15 2477.737929 NaN
15 2 14 7.822244e+03 2018-02-19 lg NaN 538.113849 934.655776 2215.079016 2018-03-11 NaN 1995.464010
16 2 15 7.822244e+03 2018-02-19 lg 140.761112 538.113849 834.796711 1855.797279 2018-04-11 2035.438148 2206.623689
17 2 16 7.822244e+03 2018-02-19 lg 191.061416 538.113849 999.442880 2135.130741 2018-05-12 1995.464010 2277.865772
18 2 17 7.822244e+03 2018-02-19 lg 258.865637 538.113849 1006.651873 2278.116638 2018-06-12 2206.623689 2037.596258
19 2 18 7.822244e+03 2018-02-19 lg 233.545889 538.113849 1053.284574 2277.614906 2018-07-13 2277.865772 2089.274257
20 2 19 7.822244e+03 2018-02-19 lg 74.862513 538.113849 1124.678558 1797.577610 2018-08-13 2037.596258 2382.971573
21 2 20 7.822244e+03 2018-02-19 lg 331.113763 538.113849 1082.653241 2380.970903 2018-09-13 2089.274257 2359.840493
22 2 21 7.822244e+03 2018-02-19 lg 313.764030 538.113849 851.352016 2384.972242 2018-10-14 2382.971573 2329.509889
23 2 22 7.822244e+03 2018-02-19 lg NaN 538.113849 946.388351 2334.708744 2018-11-14 2359.840493 NaN
24 2 23 7.822244e+03 2018-02-19 lg 264.382012 538.113849 1078.967846 2324.311034 2018-12-15 2329.509889 NaN
25 3 14 6.710126e+03 2018-02-26 motorola 336.382952 219.718444 1086.431562 1576.195080 2018-03-11 NaN 1478.949899
26 3 15 6.710126e+03 2018-02-26 motorola 294.471077 219.718444 977.412880 1569.622583 2018-04-11 1572.908831 1438.891110
27 3 16 6.710126e+03 2018-02-26 motorola 217.297753 219.718444 832.223156 1388.277215 2018-05-12 1478.949899 1523.710463
28 3 17 6.710126e+03 2018-02-26 motorola 233.320385 219.718444 1100.760524 1489.505004 2018-06-12 1438.891110 1409.380176
29 3 18 6.710126e+03 2018-02-26 motorola NaN 219.718444 956.273725 1557.915921 2018-07-13 1523.710463 1282.434410
... ... ... ... ... ... ... ... ... ... ... ... ...
119927 9998 13 4.302987e+03 2017-11-20 lg 394.733451 741.394048 865.889465 2787.969664 2018-02-08 2741.752199 2529.883896
119928 9998 14 4.302987e+03 2017-11-20 lg 103.497621 741.394048 1016.677574 2439.669647 2018-03-11 2613.819656 2608.103212
119929 9998 15 4.302987e+03 2017-11-20 lg 324.048521 741.394048 1008.213981 2620.098144 2018-04-11 2529.883896 3491.825613
119930 9998 16 4.302987e+03 2017-11-20 lg 285.229650 741.394048 942.462260 2596.108281 2018-05-12 2608.103212 3621.845678
119931 9998 17 9.999999e+06 2017-11-20 lg 279.223919 741.394048 936.814390 4387.542945 2018-06-12 3491.825613 2643.201214
119932 9998 18 4.302987e+03 2017-11-20 lg 434.538705 741.394048 1104.925121 2856.148411 2018-07-13 3621.845678 2571.631026
119933 9998 19 4.302987e+03 2017-11-20 lg 228.848144 741.394048 991.911761 2430.254018 2018-08-13 2643.201214 2795.306656
119934 9998 20 4.302987e+03 2017-11-20 lg 222.192479 741.394048 895.700387 2713.008033 2018-09-13 2571.631026 2809.949884
119935 9998 21 4.302987e+03 2017-11-20 lg 472.168078 741.394048 999.813229 2877.605278 2018-10-14 2795.306656 2716.380892
119936 9998 22 4.302987e+03 2017-11-20 lg 315.973050 741.394048 1007.066609 2742.294489 2018-11-14 2809.949884 NaN
119937 9998 23 4.302987e+03 2017-11-20 lg 351.939005 741.394048 1024.544743 2690.467294 2018-12-15 2716.380892 NaN
119938 9999 5 5.870300e+03 2017-05-14 motorola 535.589626 238.323118 928.305709 2131.044250 2017-06-05 NaN 1931.664894
119939 9999 6 5.870300e+03 2017-05-14 motorola 495.595018 238.323118 956.751764 2199.817043 2017-07-06 2165.430646 1692.112215
119940 9999 7 5.870300e+03 2017-05-14 motorola 283.991075 238.323118 939.329302 1663.512745 2017-08-06 1931.664894 1574.184881
119941 9999 8 5.870300e+03 2017-05-14 motorola 322.510656 238.323118 1192.457400 1720.711685 2017-09-06 1692.112215 1786.410981
119942 9999 9 5.870300e+03 2017-05-14 motorola 219.439185 238.323118 863.324305 1427.658077 2017-10-07 1574.184881 1898.651647
119943 9999 10 5.870300e+03 2017-05-14 motorola 573.838047 238.323118 992.516111 2145.163885 2017-11-07 1786.410981 1660.532118
119944 9999 11 5.870300e+03 2017-05-14 motorola 298.403830 238.323118 1007.786207 1652.139410 2017-12-08 1898.651647 1841.301458
119945 9999 12 5.870300e+03 2017-05-14 motorola 381.954312 238.323118 997.821963 1668.924825 2018-01-08 1660.532118 1935.635112
119946 9999 13 5.870300e+03 2017-05-14 motorola 444.736416 238.323118 907.488679 2013.678091 2018-02-08 1841.301458 1527.853156
119947 9999 14 5.870300e+03 2017-05-14 motorola 348.825753 238.323118 1017.164196 1857.592133 2018-03-11 1935.635112 2408.401414
119948 9999 15 5.870300e+03 2017-05-14 motorola 129.942796 238.323118 973.744805 1198.114179 2018-04-11 1527.853156 2635.114117
119949 9999 16 9.999999e+06 2017-05-14 motorola 223.670064 238.323118 998.105372 3618.688650 2018-05-12 2408.401414 1617.623205
119950 9999 17 5.870300e+03 2017-05-14 motorola 307.816191 238.323118 1031.266520 1651.539584 2018-06-12 2635.114117 1472.263398
119951 9999 18 5.870300e+03 2017-05-14 motorola 246.119892 238.323118 1023.319029 1583.706825 2018-07-13 1617.623205 1523.216514
119952 9999 19 5.870300e+03 2017-05-14 motorola 122.151234 238.323118 1155.941675 1360.819971 2018-08-13 1472.263398 1778.078244
119953 9999 20 5.870300e+03 2017-05-14 motorola 348.065944 238.323118 1004.350395 1685.613058 2018-09-13 1523.216514 1719.933228
119954 9999 21 5.870300e+03 2017-05-14 motorola 433.259662 238.323118 1129.281883 1870.543429 2018-10-14 1778.078244 1687.839606
119955 9999 22 5.870300e+03 2017-05-14 motorola 297.181925 238.323118 1130.956199 1569.323027 2018-11-14 1719.933228 NaN
119956 9999 23 5.870300e+03 2017-05-14 motorola NaN 238.323118 1066.198167 1806.356185 2018-12-15 1687.839606 NaN

119957 rows × 12 columns

[30]:
with_target_df.to_csv("fklearn-tutorial-input-dataset.csv", index=False)
[ ]: