This is the notebook used to generate the dataset used on the FKLearn Tutorial.ipynb¶
- The FKLearn Tutorial notebook was used to introduce FKLearn on Nubank’s Data Science Meetup and the idea was to give an overall idea on how and why you should use FKLearn
[1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
[2]:
from scipy.stats import truncnorm
def get_truncated_normal(mean=0, sd=1, low=0, upp=10):
return truncnorm(
(low - mean) / sd, (upp - mean) / sd, loc=mean, scale=sd)
[3]:
ids = range(0, 10000)
months = range(1, 24)
[4]:
unique_entries = np.array(np.meshgrid(ids, months)).T.reshape(-1, 2)
[5]:
unique_entries.shape
[5]:
(230000, 2)
[6]:
np.power(np.array([months]), 0.1)
[6]:
array([[1. , 1.07177346, 1.11612317, 1.14869835, 1.17461894,
1.1962312 , 1.21481404, 1.23114441, 1.24573094, 1.25892541,
1.27098162, 1.28208885, 1.29239222, 1.30200545, 1.31101942,
1.31950791, 1.32753167, 1.33514136, 1.34237965, 1.34928285,
1.35588211, 1.36220437, 1.36827308]])
[7]:
X = get_truncated_normal(5000, 2000, 300, 20000)
income_by_id = X.rvs(len(ids))
income_by_id = np.repeat(income_by_id, len(months))
income_wrong_entry = np.random.binomial(1, 1 - 0.05, unique_entries.shape[0]).astype(bool)
income_array = np.where(income_wrong_entry == True, income_by_id.reshape(1, -1), 9999999)
[8]:
income_array.shape
[8]:
(1, 230000)
[9]:
plt.hist(income_by_id, bins = range(0, 20000, 500))
[9]:
(array([8.5100e+02, 2.4150e+03, 3.8180e+03, 5.8880e+03, 9.3610e+03,
1.2420e+04, 1.6031e+04, 1.9228e+04, 2.1091e+04, 2.3276e+04,
2.2448e+04, 2.1942e+04, 2.0263e+04, 1.4789e+04, 1.2236e+04,
9.2230e+03, 5.8420e+03, 3.9100e+03, 2.3230e+03, 1.2880e+03,
7.3600e+02, 2.7600e+02, 9.2000e+01, 1.8400e+02, 2.3000e+01,
2.3000e+01, 2.3000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]),
array([ 0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000,
4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500,
9000, 9500, 10000, 10500, 11000, 11500, 12000, 12500, 13000,
13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500,
18000, 18500, 19000, 19500]),
<a list of 39 Patch objects>)
[10]:
customer_creation_date = []
for m_id in np.random.choice(len(months) * 31, len(ids)):
customer_creation_date.append(np.datetime64("2017-01-01") + np.timedelta64(int(m_id), 'D'))
customer_creation_date = np.repeat(np.array(customer_creation_date), len(months))
[11]:
phone_branches = ["samsung", "motorola", "iphone", "lg"]
random_phone = np.random.choice(4, len(ids), p=[0.15, 0.3, 0.25, 0.3])
cellphone_branch = [phone_branches[i] for i in random_phone]
cellphone_branch = np.repeat(cellphone_branch, len(months))
phone_factor = [0.7, 0.3, 0.9, 0.45]
cellphone_factor = [phone_factor[i] for i in random_phone]
cellphone_factor = np.repeat(cellphone_factor, len(months))
[12]:
cellphone_factor
[12]:
array([0.45, 0.45, 0.45, ..., 0.3 , 0.3 , 0.3 ])
[13]:
bureau_missing = np.random.binomial(1, 1 - 0.1, unique_entries.shape[0]).astype(bool)
Y = get_truncated_normal(500, 250, 0, 1000)
bureau_score = Y.rvs(unique_entries.shape[0])
monthly_factor = np.tile(np.power(np.array(months), 0.2), len(ids))
bureau_score = np.where(bureau_missing == True, bureau_score, np.nan) / monthly_factor
[14]:
bureau_score
[14]:
array([395.94580788, 415.29087644, 159.24609131, ..., 433.25966177,
297.1819245 , nan])
[15]:
plt.hist(bureau_score, bins = range(0, 1000, 25))
[15]:
(array([ 2219., 2924., 3928., 4948., 6192., 7456., 8735., 10123.,
11212., 11909., 12934., 13351., 12961., 12877., 12282., 11558.,
10431., 9236., 8037., 7139., 5836., 4894., 3652., 2821.,
2063., 1654., 1256., 1029., 817., 572., 480., 375.,
275., 240., 206., 132., 78., 86., 69.]),
array([ 0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300,
325, 350, 375, 400, 425, 450, 475, 500, 525, 550, 575, 600, 625,
650, 675, 700, 725, 750, 775, 800, 825, 850, 875, 900, 925, 950,
975]),
<a list of 39 Patch objects>)
[16]:
willingness_to_spend = np.repeat(np.random.normal(500, 200, len(ids)), len(months))
[17]:
willingness_to_spend
[17]:
array([933.87350032, 933.87350032, 933.87350032, ..., 238.32311792,
238.32311792, 238.32311792])
[18]:
plt.hist(willingness_to_spend, bins = range(-1000, 1500, 50))
[18]:
(array([0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
0.0000e+00, 0.0000e+00, 0.0000e+00, 2.3000e+01, 0.0000e+00,
2.3000e+01, 4.6000e+01, 2.3000e+02, 4.6000e+02, 6.2100e+02,
1.1730e+03, 2.4610e+03, 4.1400e+03, 6.0030e+03, 7.9580e+03,
1.1868e+04, 1.4789e+04, 1.9044e+04, 2.0493e+04, 2.3276e+04,
2.3184e+04, 2.2379e+04, 1.8860e+04, 1.5571e+04, 1.2167e+04,
9.5680e+03, 5.9570e+03, 4.3930e+03, 2.2540e+03, 1.4030e+03,
9.6600e+02, 3.9100e+02, 1.8400e+02, 2.3000e+01, 6.9000e+01,
0.0000e+00, 2.3000e+01, 0.0000e+00, 0.0000e+00]),
array([-1000, -950, -900, -850, -800, -750, -700, -650, -600,
-550, -500, -450, -400, -350, -300, -250, -200, -150,
-100, -50, 0, 50, 100, 150, 200, 250, 300,
350, 400, 450, 500, 550, 600, 650, 700, 750,
800, 850, 900, 950, 1000, 1050, 1100, 1150, 1200,
1250, 1300, 1350, 1400, 1450]),
<a list of 49 Patch objects>)
[31]:
noise_feature = np.random.normal(1000, 100, unique_entries.shape[0])
[32]:
a = (willingness_to_spend)
a_norm = (a - a.min()) / (a.max() - a.min())
b = (income_array)
b_norm = (b - b.min()) / (b.max() - b.min())
c = cellphone_factor * willingness_to_spend
c_norm = (c - c.min()) / (c.max() - c.min())
d = (np.where(np.isnan(bureau_score), 300.0, bureau_score))
d_norm = (d - d.min()) / (d.max() - d.min())
# e = np.random.normal(1, 0.3, unique_entries.shape[0])
W = get_truncated_normal(2000, 100, 0, 50000)
spend = (a_norm + b_norm + c_norm + d_norm) * W.rvs(unique_entries.shape[0])
[21]:
spend
[21]:
array([[6476.47307951, 4740.97909678, 3348.94742391, ..., 2367.47238387,
4354.480922 , 3508.97334522]])
[22]:
spend.shape
[22]:
(1, 230000)
[23]:
income_array.shape
[23]:
(1, 230000)
[33]:
initial_df = (pd.DataFrame(
unique_entries, columns=["id", "month"]
).assign(
income=income_array.T,
created_at=customer_creation_date.T,
phone_type=cellphone_branch.T,
bureau_score=bureau_score.T,
spend_desire=willingness_to_spend.T,
random_noise=noise_feature.T,
monthly_spend=spend.T,
month_date=lambda df: df.month * 31 + np.datetime64("2017-01-01")
)
.loc[lambda df: df.month_date >= df.created_at])
[34]:
plt.plot(sorted(initial_df.month.unique()), initial_df.groupby("month").agg({"bureau_score": "mean"}))
[34]:
[<matplotlib.lines.Line2D at 0x1a24b54940>]
[35]:
initial_df
[35]:
id | month | income | created_at | phone_type | bureau_score | spend_desire | random_noise | monthly_spend | month_date | |
---|---|---|---|---|---|---|---|---|---|---|
10 | 0 | 11 | 2.662664e+03 | 2017-11-24 | lg | NaN | 933.873500 | 1139.988267 | 3044.984797 | 2017-12-08 |
11 | 0 | 12 | 2.662664e+03 | 2017-11-24 | lg | 259.050372 | 933.873500 | 949.164731 | 2911.910942 | 2018-01-08 |
12 | 0 | 13 | 2.662664e+03 | 2017-11-24 | lg | 295.523138 | 933.873500 | 829.196293 | 2867.779678 | 2018-02-08 |
13 | 0 | 14 | 2.662664e+03 | 2017-11-24 | lg | 260.729691 | 933.873500 | 1001.109359 | 3160.060523 | 2018-03-11 |
14 | 0 | 15 | 2.662664e+03 | 2017-11-24 | lg | 396.889624 | 933.873500 | 997.938315 | 3420.741792 | 2018-04-11 |
15 | 0 | 16 | 2.662664e+03 | 2017-11-24 | lg | 78.479225 | 933.873500 | 1032.025854 | 2484.013650 | 2018-05-12 |
16 | 0 | 17 | 2.662664e+03 | 2017-11-24 | lg | 79.617325 | 933.873500 | 969.913448 | 2454.602869 | 2018-06-12 |
17 | 0 | 18 | 2.662664e+03 | 2017-11-24 | lg | 177.472256 | 933.873500 | 766.796672 | 2803.507917 | 2018-07-13 |
18 | 0 | 19 | 2.662664e+03 | 2017-11-24 | lg | 189.046991 | 933.873500 | 907.258777 | 2580.205489 | 2018-08-13 |
19 | 0 | 20 | 2.662664e+03 | 2017-11-24 | lg | 296.367569 | 933.873500 | 1063.450859 | 3232.765577 | 2018-09-13 |
20 | 0 | 21 | 2.662664e+03 | 2017-11-24 | lg | 106.914660 | 933.873500 | 1095.115641 | 2722.341319 | 2018-10-14 |
21 | 0 | 22 | 2.662664e+03 | 2017-11-24 | lg | 111.076808 | 933.873500 | 964.705725 | 2334.885221 | 2018-11-14 |
22 | 0 | 23 | 2.662664e+03 | 2017-11-24 | lg | 204.017107 | 933.873500 | 999.699641 | 2862.418232 | 2018-12-15 |
44 | 1 | 22 | 3.919847e+03 | 2018-10-31 | samsung | 51.622959 | 661.945869 | 716.221137 | 2415.757986 | 2018-11-14 |
45 | 1 | 23 | 3.919847e+03 | 2018-10-31 | samsung | 335.759979 | 661.945869 | 817.390238 | 2539.717872 | 2018-12-15 |
59 | 2 | 14 | 7.822244e+03 | 2018-02-19 | lg | NaN | 538.113849 | 934.655776 | 2215.079016 | 2018-03-11 |
60 | 2 | 15 | 7.822244e+03 | 2018-02-19 | lg | 140.761112 | 538.113849 | 834.796711 | 1855.797279 | 2018-04-11 |
61 | 2 | 16 | 7.822244e+03 | 2018-02-19 | lg | 191.061416 | 538.113849 | 999.442880 | 2135.130741 | 2018-05-12 |
62 | 2 | 17 | 7.822244e+03 | 2018-02-19 | lg | 258.865637 | 538.113849 | 1006.651873 | 2278.116638 | 2018-06-12 |
63 | 2 | 18 | 7.822244e+03 | 2018-02-19 | lg | 233.545889 | 538.113849 | 1053.284574 | 2277.614906 | 2018-07-13 |
64 | 2 | 19 | 7.822244e+03 | 2018-02-19 | lg | 74.862513 | 538.113849 | 1124.678558 | 1797.577610 | 2018-08-13 |
65 | 2 | 20 | 7.822244e+03 | 2018-02-19 | lg | 331.113763 | 538.113849 | 1082.653241 | 2380.970903 | 2018-09-13 |
66 | 2 | 21 | 7.822244e+03 | 2018-02-19 | lg | 313.764030 | 538.113849 | 851.352016 | 2384.972242 | 2018-10-14 |
67 | 2 | 22 | 7.822244e+03 | 2018-02-19 | lg | NaN | 538.113849 | 946.388351 | 2334.708744 | 2018-11-14 |
68 | 2 | 23 | 7.822244e+03 | 2018-02-19 | lg | 264.382012 | 538.113849 | 1078.967846 | 2324.311034 | 2018-12-15 |
82 | 3 | 14 | 6.710126e+03 | 2018-02-26 | motorola | 336.382952 | 219.718444 | 1086.431562 | 1576.195080 | 2018-03-11 |
83 | 3 | 15 | 6.710126e+03 | 2018-02-26 | motorola | 294.471077 | 219.718444 | 977.412880 | 1569.622583 | 2018-04-11 |
84 | 3 | 16 | 6.710126e+03 | 2018-02-26 | motorola | 217.297753 | 219.718444 | 832.223156 | 1388.277215 | 2018-05-12 |
85 | 3 | 17 | 6.710126e+03 | 2018-02-26 | motorola | 233.320385 | 219.718444 | 1100.760524 | 1489.505004 | 2018-06-12 |
86 | 3 | 18 | 6.710126e+03 | 2018-02-26 | motorola | NaN | 219.718444 | 956.273725 | 1557.915921 | 2018-07-13 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
229966 | 9998 | 13 | 4.302987e+03 | 2017-11-20 | lg | 394.733451 | 741.394048 | 865.889465 | 2787.969664 | 2018-02-08 |
229967 | 9998 | 14 | 4.302987e+03 | 2017-11-20 | lg | 103.497621 | 741.394048 | 1016.677574 | 2439.669647 | 2018-03-11 |
229968 | 9998 | 15 | 4.302987e+03 | 2017-11-20 | lg | 324.048521 | 741.394048 | 1008.213981 | 2620.098144 | 2018-04-11 |
229969 | 9998 | 16 | 4.302987e+03 | 2017-11-20 | lg | 285.229650 | 741.394048 | 942.462260 | 2596.108281 | 2018-05-12 |
229970 | 9998 | 17 | 9.999999e+06 | 2017-11-20 | lg | 279.223919 | 741.394048 | 936.814390 | 4387.542945 | 2018-06-12 |
229971 | 9998 | 18 | 4.302987e+03 | 2017-11-20 | lg | 434.538705 | 741.394048 | 1104.925121 | 2856.148411 | 2018-07-13 |
229972 | 9998 | 19 | 4.302987e+03 | 2017-11-20 | lg | 228.848144 | 741.394048 | 991.911761 | 2430.254018 | 2018-08-13 |
229973 | 9998 | 20 | 4.302987e+03 | 2017-11-20 | lg | 222.192479 | 741.394048 | 895.700387 | 2713.008033 | 2018-09-13 |
229974 | 9998 | 21 | 4.302987e+03 | 2017-11-20 | lg | 472.168078 | 741.394048 | 999.813229 | 2877.605278 | 2018-10-14 |
229975 | 9998 | 22 | 4.302987e+03 | 2017-11-20 | lg | 315.973050 | 741.394048 | 1007.066609 | 2742.294489 | 2018-11-14 |
229976 | 9998 | 23 | 4.302987e+03 | 2017-11-20 | lg | 351.939005 | 741.394048 | 1024.544743 | 2690.467294 | 2018-12-15 |
229981 | 9999 | 5 | 5.870300e+03 | 2017-05-14 | motorola | 535.589626 | 238.323118 | 928.305709 | 2131.044250 | 2017-06-05 |
229982 | 9999 | 6 | 5.870300e+03 | 2017-05-14 | motorola | 495.595018 | 238.323118 | 956.751764 | 2199.817043 | 2017-07-06 |
229983 | 9999 | 7 | 5.870300e+03 | 2017-05-14 | motorola | 283.991075 | 238.323118 | 939.329302 | 1663.512745 | 2017-08-06 |
229984 | 9999 | 8 | 5.870300e+03 | 2017-05-14 | motorola | 322.510656 | 238.323118 | 1192.457400 | 1720.711685 | 2017-09-06 |
229985 | 9999 | 9 | 5.870300e+03 | 2017-05-14 | motorola | 219.439185 | 238.323118 | 863.324305 | 1427.658077 | 2017-10-07 |
229986 | 9999 | 10 | 5.870300e+03 | 2017-05-14 | motorola | 573.838047 | 238.323118 | 992.516111 | 2145.163885 | 2017-11-07 |
229987 | 9999 | 11 | 5.870300e+03 | 2017-05-14 | motorola | 298.403830 | 238.323118 | 1007.786207 | 1652.139410 | 2017-12-08 |
229988 | 9999 | 12 | 5.870300e+03 | 2017-05-14 | motorola | 381.954312 | 238.323118 | 997.821963 | 1668.924825 | 2018-01-08 |
229989 | 9999 | 13 | 5.870300e+03 | 2017-05-14 | motorola | 444.736416 | 238.323118 | 907.488679 | 2013.678091 | 2018-02-08 |
229990 | 9999 | 14 | 5.870300e+03 | 2017-05-14 | motorola | 348.825753 | 238.323118 | 1017.164196 | 1857.592133 | 2018-03-11 |
229991 | 9999 | 15 | 5.870300e+03 | 2017-05-14 | motorola | 129.942796 | 238.323118 | 973.744805 | 1198.114179 | 2018-04-11 |
229992 | 9999 | 16 | 9.999999e+06 | 2017-05-14 | motorola | 223.670064 | 238.323118 | 998.105372 | 3618.688650 | 2018-05-12 |
229993 | 9999 | 17 | 5.870300e+03 | 2017-05-14 | motorola | 307.816191 | 238.323118 | 1031.266520 | 1651.539584 | 2018-06-12 |
229994 | 9999 | 18 | 5.870300e+03 | 2017-05-14 | motorola | 246.119892 | 238.323118 | 1023.319029 | 1583.706825 | 2018-07-13 |
229995 | 9999 | 19 | 5.870300e+03 | 2017-05-14 | motorola | 122.151234 | 238.323118 | 1155.941675 | 1360.819971 | 2018-08-13 |
229996 | 9999 | 20 | 5.870300e+03 | 2017-05-14 | motorola | 348.065944 | 238.323118 | 1004.350395 | 1685.613058 | 2018-09-13 |
229997 | 9999 | 21 | 5.870300e+03 | 2017-05-14 | motorola | 433.259662 | 238.323118 | 1129.281883 | 1870.543429 | 2018-10-14 |
229998 | 9999 | 22 | 5.870300e+03 | 2017-05-14 | motorola | 297.181925 | 238.323118 | 1130.956199 | 1569.323027 | 2018-11-14 |
229999 | 9999 | 23 | 5.870300e+03 | 2017-05-14 | motorola | NaN | 238.323118 | 1066.198167 | 1806.356185 | 2018-12-15 |
119957 rows × 10 columns
[36]:
with_target_df = (initial_df.groupby("id", as_index=False).apply(lambda df: df
.assign(
avg_last_2_months_spend=lambda x: x.monthly_spend.rolling(2).mean(),
target=lambda x: x.avg_last_2_months_spend.shift(-2)))
.reset_index(drop=True))
[37]:
with_target_df
[37]:
id | month | income | created_at | phone_type | bureau_score | spend_desire | random_noise | monthly_spend | month_date | avg_last_2_months_spend | target | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 11 | 2.662664e+03 | 2017-11-24 | lg | NaN | 933.873500 | 1139.988267 | 3044.984797 | 2017-12-08 | NaN | 2889.845310 |
1 | 0 | 12 | 2.662664e+03 | 2017-11-24 | lg | 259.050372 | 933.873500 | 949.164731 | 2911.910942 | 2018-01-08 | 2978.447870 | 3013.920100 |
2 | 0 | 13 | 2.662664e+03 | 2017-11-24 | lg | 295.523138 | 933.873500 | 829.196293 | 2867.779678 | 2018-02-08 | 2889.845310 | 3290.401158 |
3 | 0 | 14 | 2.662664e+03 | 2017-11-24 | lg | 260.729691 | 933.873500 | 1001.109359 | 3160.060523 | 2018-03-11 | 3013.920100 | 2952.377721 |
4 | 0 | 15 | 2.662664e+03 | 2017-11-24 | lg | 396.889624 | 933.873500 | 997.938315 | 3420.741792 | 2018-04-11 | 3290.401158 | 2469.308259 |
5 | 0 | 16 | 2.662664e+03 | 2017-11-24 | lg | 78.479225 | 933.873500 | 1032.025854 | 2484.013650 | 2018-05-12 | 2952.377721 | 2629.055393 |
6 | 0 | 17 | 2.662664e+03 | 2017-11-24 | lg | 79.617325 | 933.873500 | 969.913448 | 2454.602869 | 2018-06-12 | 2469.308259 | 2691.856703 |
7 | 0 | 18 | 2.662664e+03 | 2017-11-24 | lg | 177.472256 | 933.873500 | 766.796672 | 2803.507917 | 2018-07-13 | 2629.055393 | 2906.485533 |
8 | 0 | 19 | 2.662664e+03 | 2017-11-24 | lg | 189.046991 | 933.873500 | 907.258777 | 2580.205489 | 2018-08-13 | 2691.856703 | 2977.553448 |
9 | 0 | 20 | 2.662664e+03 | 2017-11-24 | lg | 296.367569 | 933.873500 | 1063.450859 | 3232.765577 | 2018-09-13 | 2906.485533 | 2528.613270 |
10 | 0 | 21 | 2.662664e+03 | 2017-11-24 | lg | 106.914660 | 933.873500 | 1095.115641 | 2722.341319 | 2018-10-14 | 2977.553448 | 2598.651727 |
11 | 0 | 22 | 2.662664e+03 | 2017-11-24 | lg | 111.076808 | 933.873500 | 964.705725 | 2334.885221 | 2018-11-14 | 2528.613270 | NaN |
12 | 0 | 23 | 2.662664e+03 | 2017-11-24 | lg | 204.017107 | 933.873500 | 999.699641 | 2862.418232 | 2018-12-15 | 2598.651727 | NaN |
13 | 1 | 22 | 3.919847e+03 | 2018-10-31 | samsung | 51.622959 | 661.945869 | 716.221137 | 2415.757986 | 2018-11-14 | NaN | NaN |
14 | 1 | 23 | 3.919847e+03 | 2018-10-31 | samsung | 335.759979 | 661.945869 | 817.390238 | 2539.717872 | 2018-12-15 | 2477.737929 | NaN |
15 | 2 | 14 | 7.822244e+03 | 2018-02-19 | lg | NaN | 538.113849 | 934.655776 | 2215.079016 | 2018-03-11 | NaN | 1995.464010 |
16 | 2 | 15 | 7.822244e+03 | 2018-02-19 | lg | 140.761112 | 538.113849 | 834.796711 | 1855.797279 | 2018-04-11 | 2035.438148 | 2206.623689 |
17 | 2 | 16 | 7.822244e+03 | 2018-02-19 | lg | 191.061416 | 538.113849 | 999.442880 | 2135.130741 | 2018-05-12 | 1995.464010 | 2277.865772 |
18 | 2 | 17 | 7.822244e+03 | 2018-02-19 | lg | 258.865637 | 538.113849 | 1006.651873 | 2278.116638 | 2018-06-12 | 2206.623689 | 2037.596258 |
19 | 2 | 18 | 7.822244e+03 | 2018-02-19 | lg | 233.545889 | 538.113849 | 1053.284574 | 2277.614906 | 2018-07-13 | 2277.865772 | 2089.274257 |
20 | 2 | 19 | 7.822244e+03 | 2018-02-19 | lg | 74.862513 | 538.113849 | 1124.678558 | 1797.577610 | 2018-08-13 | 2037.596258 | 2382.971573 |
21 | 2 | 20 | 7.822244e+03 | 2018-02-19 | lg | 331.113763 | 538.113849 | 1082.653241 | 2380.970903 | 2018-09-13 | 2089.274257 | 2359.840493 |
22 | 2 | 21 | 7.822244e+03 | 2018-02-19 | lg | 313.764030 | 538.113849 | 851.352016 | 2384.972242 | 2018-10-14 | 2382.971573 | 2329.509889 |
23 | 2 | 22 | 7.822244e+03 | 2018-02-19 | lg | NaN | 538.113849 | 946.388351 | 2334.708744 | 2018-11-14 | 2359.840493 | NaN |
24 | 2 | 23 | 7.822244e+03 | 2018-02-19 | lg | 264.382012 | 538.113849 | 1078.967846 | 2324.311034 | 2018-12-15 | 2329.509889 | NaN |
25 | 3 | 14 | 6.710126e+03 | 2018-02-26 | motorola | 336.382952 | 219.718444 | 1086.431562 | 1576.195080 | 2018-03-11 | NaN | 1478.949899 |
26 | 3 | 15 | 6.710126e+03 | 2018-02-26 | motorola | 294.471077 | 219.718444 | 977.412880 | 1569.622583 | 2018-04-11 | 1572.908831 | 1438.891110 |
27 | 3 | 16 | 6.710126e+03 | 2018-02-26 | motorola | 217.297753 | 219.718444 | 832.223156 | 1388.277215 | 2018-05-12 | 1478.949899 | 1523.710463 |
28 | 3 | 17 | 6.710126e+03 | 2018-02-26 | motorola | 233.320385 | 219.718444 | 1100.760524 | 1489.505004 | 2018-06-12 | 1438.891110 | 1409.380176 |
29 | 3 | 18 | 6.710126e+03 | 2018-02-26 | motorola | NaN | 219.718444 | 956.273725 | 1557.915921 | 2018-07-13 | 1523.710463 | 1282.434410 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
119927 | 9998 | 13 | 4.302987e+03 | 2017-11-20 | lg | 394.733451 | 741.394048 | 865.889465 | 2787.969664 | 2018-02-08 | 2741.752199 | 2529.883896 |
119928 | 9998 | 14 | 4.302987e+03 | 2017-11-20 | lg | 103.497621 | 741.394048 | 1016.677574 | 2439.669647 | 2018-03-11 | 2613.819656 | 2608.103212 |
119929 | 9998 | 15 | 4.302987e+03 | 2017-11-20 | lg | 324.048521 | 741.394048 | 1008.213981 | 2620.098144 | 2018-04-11 | 2529.883896 | 3491.825613 |
119930 | 9998 | 16 | 4.302987e+03 | 2017-11-20 | lg | 285.229650 | 741.394048 | 942.462260 | 2596.108281 | 2018-05-12 | 2608.103212 | 3621.845678 |
119931 | 9998 | 17 | 9.999999e+06 | 2017-11-20 | lg | 279.223919 | 741.394048 | 936.814390 | 4387.542945 | 2018-06-12 | 3491.825613 | 2643.201214 |
119932 | 9998 | 18 | 4.302987e+03 | 2017-11-20 | lg | 434.538705 | 741.394048 | 1104.925121 | 2856.148411 | 2018-07-13 | 3621.845678 | 2571.631026 |
119933 | 9998 | 19 | 4.302987e+03 | 2017-11-20 | lg | 228.848144 | 741.394048 | 991.911761 | 2430.254018 | 2018-08-13 | 2643.201214 | 2795.306656 |
119934 | 9998 | 20 | 4.302987e+03 | 2017-11-20 | lg | 222.192479 | 741.394048 | 895.700387 | 2713.008033 | 2018-09-13 | 2571.631026 | 2809.949884 |
119935 | 9998 | 21 | 4.302987e+03 | 2017-11-20 | lg | 472.168078 | 741.394048 | 999.813229 | 2877.605278 | 2018-10-14 | 2795.306656 | 2716.380892 |
119936 | 9998 | 22 | 4.302987e+03 | 2017-11-20 | lg | 315.973050 | 741.394048 | 1007.066609 | 2742.294489 | 2018-11-14 | 2809.949884 | NaN |
119937 | 9998 | 23 | 4.302987e+03 | 2017-11-20 | lg | 351.939005 | 741.394048 | 1024.544743 | 2690.467294 | 2018-12-15 | 2716.380892 | NaN |
119938 | 9999 | 5 | 5.870300e+03 | 2017-05-14 | motorola | 535.589626 | 238.323118 | 928.305709 | 2131.044250 | 2017-06-05 | NaN | 1931.664894 |
119939 | 9999 | 6 | 5.870300e+03 | 2017-05-14 | motorola | 495.595018 | 238.323118 | 956.751764 | 2199.817043 | 2017-07-06 | 2165.430646 | 1692.112215 |
119940 | 9999 | 7 | 5.870300e+03 | 2017-05-14 | motorola | 283.991075 | 238.323118 | 939.329302 | 1663.512745 | 2017-08-06 | 1931.664894 | 1574.184881 |
119941 | 9999 | 8 | 5.870300e+03 | 2017-05-14 | motorola | 322.510656 | 238.323118 | 1192.457400 | 1720.711685 | 2017-09-06 | 1692.112215 | 1786.410981 |
119942 | 9999 | 9 | 5.870300e+03 | 2017-05-14 | motorola | 219.439185 | 238.323118 | 863.324305 | 1427.658077 | 2017-10-07 | 1574.184881 | 1898.651647 |
119943 | 9999 | 10 | 5.870300e+03 | 2017-05-14 | motorola | 573.838047 | 238.323118 | 992.516111 | 2145.163885 | 2017-11-07 | 1786.410981 | 1660.532118 |
119944 | 9999 | 11 | 5.870300e+03 | 2017-05-14 | motorola | 298.403830 | 238.323118 | 1007.786207 | 1652.139410 | 2017-12-08 | 1898.651647 | 1841.301458 |
119945 | 9999 | 12 | 5.870300e+03 | 2017-05-14 | motorola | 381.954312 | 238.323118 | 997.821963 | 1668.924825 | 2018-01-08 | 1660.532118 | 1935.635112 |
119946 | 9999 | 13 | 5.870300e+03 | 2017-05-14 | motorola | 444.736416 | 238.323118 | 907.488679 | 2013.678091 | 2018-02-08 | 1841.301458 | 1527.853156 |
119947 | 9999 | 14 | 5.870300e+03 | 2017-05-14 | motorola | 348.825753 | 238.323118 | 1017.164196 | 1857.592133 | 2018-03-11 | 1935.635112 | 2408.401414 |
119948 | 9999 | 15 | 5.870300e+03 | 2017-05-14 | motorola | 129.942796 | 238.323118 | 973.744805 | 1198.114179 | 2018-04-11 | 1527.853156 | 2635.114117 |
119949 | 9999 | 16 | 9.999999e+06 | 2017-05-14 | motorola | 223.670064 | 238.323118 | 998.105372 | 3618.688650 | 2018-05-12 | 2408.401414 | 1617.623205 |
119950 | 9999 | 17 | 5.870300e+03 | 2017-05-14 | motorola | 307.816191 | 238.323118 | 1031.266520 | 1651.539584 | 2018-06-12 | 2635.114117 | 1472.263398 |
119951 | 9999 | 18 | 5.870300e+03 | 2017-05-14 | motorola | 246.119892 | 238.323118 | 1023.319029 | 1583.706825 | 2018-07-13 | 1617.623205 | 1523.216514 |
119952 | 9999 | 19 | 5.870300e+03 | 2017-05-14 | motorola | 122.151234 | 238.323118 | 1155.941675 | 1360.819971 | 2018-08-13 | 1472.263398 | 1778.078244 |
119953 | 9999 | 20 | 5.870300e+03 | 2017-05-14 | motorola | 348.065944 | 238.323118 | 1004.350395 | 1685.613058 | 2018-09-13 | 1523.216514 | 1719.933228 |
119954 | 9999 | 21 | 5.870300e+03 | 2017-05-14 | motorola | 433.259662 | 238.323118 | 1129.281883 | 1870.543429 | 2018-10-14 | 1778.078244 | 1687.839606 |
119955 | 9999 | 22 | 5.870300e+03 | 2017-05-14 | motorola | 297.181925 | 238.323118 | 1130.956199 | 1569.323027 | 2018-11-14 | 1719.933228 | NaN |
119956 | 9999 | 23 | 5.870300e+03 | 2017-05-14 | motorola | NaN | 238.323118 | 1066.198167 | 1806.356185 | 2018-12-15 | 1687.839606 | NaN |
119957 rows × 12 columns
[30]:
with_target_df.to_csv("fklearn-tutorial-input-dataset.csv", index=False)
[ ]: