# Read-in training datasettrain=pd.read_csv('train.csv')
print("The sizing of training data: {}".format(train.shape))
print("First 5 records: \n")
print(train.head(5))
# This will list the entire column name# print(list(train))target=train["label"]
train=train.drop("label",1)
# Understand more on the dataprint("Check the label of training data:\n{}".format(target.value_counts(sort=True)))
# What about the actual pixel variables?figure(figsize(10,10))
fordigit_numinrange(0,4):
subplot(2,2,digit_num+1)
plt.hist(train.iloc[digit_num])
suptitle("Before transform")
# Normalize the training datatrain/=255figure(figsize(10,10))
fordigit_numinrange(0,4):
subplot(2,2,digit_num+1)
plt.hist(train.iloc[digit_num])
suptitle("After transform")
Check the label of training data:
1 4684
7 4401
3 4351
9 4188
2 4177
6 4137
0 4132
4 4072
8 4063
5 3795
Name: label, dtype: int64
# Convert pixel from 784x1 (1 dimension) to 28x28 (2 dimensions)figure(figsize(5,5))
fordigit_numinrange(0,25):
subplot(5,5,digit_num+1)
grid_data=train.iloc[digit_num].as_matrix().reshape(28,28)
plt.imshow(grid_data, interpolation="none", cmap="bone")
xticks([])
yticks([])
### Create function to evaluate the score of each classification modeldefeval_model_classifier(model, data, target, split_ratio):
trainX, testX, trainY, testY=train_test_split(data, target, train_size=split_ratio, random_state=0)
model.fit(trainX, trainY)
returnmodel.score(testX,testY)
### 1st round: RandomForestClassification# Initialise valuesnum_estimators_array=np.array([1,5,10,50,100,200,500])
num_smpl=10# Test run the model according to samples_numbernum_grid=len(num_estimators_array)
score_array_mu=np.zeros(num_grid) # Keep meanscore_array_sigma=np.zeros(num_grid) # Keep Standard deviation j=0print("{}: RandomForestClassification Starts!".format(now()))
forn_estimatorsinnum_estimators_array:
score_array=np.zeros(num_smpl) # Initializeforiinrange(0,num_smpl):
rf_class=RandomForestClassifier(n_estimators=n_estimators, n_jobs=1, criterion="gini")
score_array[i] =eval_model_classifier(rf_class, train.iloc[0:1000], target.iloc[0:1000], 0.8)
print("{}: Try {} with n_estimators = {} and score = {}".format(now(), i, n_estimators, score_array[i]))
score_array_mu[j], score_array_sigma[j] =mean(score_array), std(score_array)
j=j+1print("{}: RandomForestClassification Done!".format(now()))
03/03/17 16:04:05: RandomForestClassification Starts!
03/03/17 16:04:05: Try 0 with n_estimators = 1 and score = 0.49
03/03/17 16:04:05: Try 1 with n_estimators = 1 and score = 0.58
03/03/17 16:04:05: Try 2 with n_estimators = 1 and score = 0.525
03/03/17 16:04:05: Try 3 with n_estimators = 1 and score = 0.51
03/03/17 16:04:05: Try 4 with n_estimators = 1 and score = 0.59
03/03/17 16:04:05: Try 5 with n_estimators = 1 and score = 0.565
03/03/17 16:04:05: Try 6 with n_estimators = 1 and score = 0.475
03/03/17 16:04:05: Try 7 with n_estimators = 1 and score = 0.57
03/03/17 16:04:05: Try 8 with n_estimators = 1 and score = 0.51
03/03/17 16:04:05: Try 9 with n_estimators = 1 and score = 0.48
03/03/17 16:04:05: Try 0 with n_estimators = 5 and score = 0.685
03/03/17 16:04:05: Try 1 with n_estimators = 5 and score = 0.73
03/03/17 16:04:05: Try 2 with n_estimators = 5 and score = 0.655
03/03/17 16:04:05: Try 3 with n_estimators = 5 and score = 0.655
03/03/17 16:04:05: Try 4 with n_estimators = 5 and score = 0.725
03/03/17 16:04:06: Try 5 with n_estimators = 5 and score = 0.74
03/03/17 16:04:06: Try 6 with n_estimators = 5 and score = 0.74
03/03/17 16:04:06: Try 7 with n_estimators = 5 and score = 0.68
03/03/17 16:04:06: Try 8 with n_estimators = 5 and score = 0.725
03/03/17 16:04:06: Try 9 with n_estimators = 5 and score = 0.64
03/03/17 16:04:06: Try 0 with n_estimators = 10 and score = 0.83
03/03/17 16:04:06: Try 1 with n_estimators = 10 and score = 0.83
03/03/17 16:04:06: Try 2 with n_estimators = 10 and score = 0.76
03/03/17 16:04:06: Try 3 with n_estimators = 10 and score = 0.785
03/03/17 16:04:06: Try 4 with n_estimators = 10 and score = 0.815
03/03/17 16:04:06: Try 5 with n_estimators = 10 and score = 0.76
03/03/17 16:04:06: Try 6 with n_estimators = 10 and score = 0.815
03/03/17 16:04:06: Try 7 with n_estimators = 10 and score = 0.775
03/03/17 16:04:06: Try 8 with n_estimators = 10 and score = 0.795
03/03/17 16:04:06: Try 9 with n_estimators = 10 and score = 0.8
03/03/17 16:04:07: Try 0 with n_estimators = 50 and score = 0.87
03/03/17 16:04:07: Try 1 with n_estimators = 50 and score = 0.875
03/03/17 16:04:07: Try 2 with n_estimators = 50 and score = 0.885
03/03/17 16:04:07: Try 3 with n_estimators = 50 and score = 0.865
03/03/17 16:04:08: Try 4 with n_estimators = 50 and score = 0.87
03/03/17 16:04:08: Try 5 with n_estimators = 50 and score = 0.86
03/03/17 16:04:08: Try 6 with n_estimators = 50 and score = 0.885
03/03/17 16:04:09: Try 7 with n_estimators = 50 and score = 0.88
03/03/17 16:04:09: Try 8 with n_estimators = 50 and score = 0.88
03/03/17 16:04:09: Try 9 with n_estimators = 50 and score = 0.875
03/03/17 16:04:10: Try 0 with n_estimators = 100 and score = 0.885
03/03/17 16:04:10: Try 1 with n_estimators = 100 and score = 0.9
03/03/17 16:04:11: Try 2 with n_estimators = 100 and score = 0.88
03/03/17 16:04:12: Try 3 with n_estimators = 100 and score = 0.89
03/03/17 16:04:12: Try 4 with n_estimators = 100 and score = 0.9
03/03/17 16:04:13: Try 5 with n_estimators = 100 and score = 0.89
03/03/17 16:04:13: Try 6 with n_estimators = 100 and score = 0.895
03/03/17 16:04:14: Try 7 with n_estimators = 100 and score = 0.89
03/03/17 16:04:14: Try 8 with n_estimators = 100 and score = 0.88
03/03/17 16:04:15: Try 9 with n_estimators = 100 and score = 0.88
03/03/17 16:04:16: Try 0 with n_estimators = 200 and score = 0.9
03/03/17 16:04:17: Try 1 with n_estimators = 200 and score = 0.91
03/03/17 16:04:18: Try 2 with n_estimators = 200 and score = 0.89
03/03/17 16:04:19: Try 3 with n_estimators = 200 and score = 0.88
03/03/17 16:04:20: Try 4 with n_estimators = 200 and score = 0.895
03/03/17 16:04:21: Try 5 with n_estimators = 200 and score = 0.885
03/03/17 16:04:22: Try 6 with n_estimators = 200 and score = 0.905
03/03/17 16:04:23: Try 7 with n_estimators = 200 and score = 0.9
03/03/17 16:04:24: Try 8 with n_estimators = 200 and score = 0.9
03/03/17 16:04:26: Try 9 with n_estimators = 200 and score = 0.88
03/03/17 16:04:28: Try 0 with n_estimators = 500 and score = 0.895
03/03/17 16:04:31: Try 1 with n_estimators = 500 and score = 0.88
03/03/17 16:04:34: Try 2 with n_estimators = 500 and score = 0.91
03/03/17 16:04:37: Try 3 with n_estimators = 500 and score = 0.91
03/03/17 16:04:40: Try 4 with n_estimators = 500 and score = 0.905
03/03/17 16:04:43: Try 5 with n_estimators = 500 and score = 0.925
03/03/17 16:04:47: Try 6 with n_estimators = 500 and score = 0.89
03/03/17 16:04:50: Try 7 with n_estimators = 500 and score = 0.91
03/03/17 16:04:54: Try 8 with n_estimators = 500 and score = 0.895
03/03/17 16:04:57: Try 9 with n_estimators = 500 and score = 0.9
03/03/17 16:04:57: RandomForestClassification Done!
# K-nearestn_neighbors_array=range(2,20)
weight_array= ['uniform','distance']
score_array=np.zeros(len(n_neighbors_array))
forweightinweight_array:
i=0forninn_neighbors_array:
nbrs=KNeighborsClassifier(n_neighbors=n, weights=weight)
score_array[i] =eval_model_classifier(nbrs, transform, target, 0.8)
print("{}: for n_neighbors = {} and weight ={} produces score = {}".format(now(), n, weight, score_array[i]))
i+=1print("Done")
03/03/17 16:24:59: for n_neighbors = 2 and weight =uniform produces score = 0.9508333333333333
03/03/17 16:25:03: for n_neighbors = 3 and weight =uniform produces score = 0.96
03/03/17 16:25:08: for n_neighbors = 4 and weight =uniform produces score = 0.9607142857142857
03/03/17 16:25:13: for n_neighbors = 5 and weight =uniform produces score = 0.9603571428571429
03/03/17 16:25:19: for n_neighbors = 6 and weight =uniform produces score = 0.9592857142857143
03/03/17 16:25:25: for n_neighbors = 7 and weight =uniform produces score = 0.9591666666666666
03/03/17 16:25:31: for n_neighbors = 8 and weight =uniform produces score = 0.9582142857142857
03/03/17 16:25:37: for n_neighbors = 9 and weight =uniform produces score = 0.958452380952381
03/03/17 16:25:44: for n_neighbors = 10 and weight =uniform produces score = 0.9576190476190476
03/03/17 16:25:50: for n_neighbors = 11 and weight =uniform produces score = 0.9570238095238095
03/03/17 16:25:57: for n_neighbors = 12 and weight =uniform produces score = 0.9578571428571429
03/03/17 16:26:04: for n_neighbors = 13 and weight =uniform produces score = 0.9553571428571429
03/03/17 16:26:11: for n_neighbors = 14 and weight =uniform produces score = 0.955952380952381
03/03/17 16:26:19: for n_neighbors = 15 and weight =uniform produces score = 0.9530952380952381
03/03/17 16:26:26: for n_neighbors = 16 and weight =uniform produces score = 0.9544047619047619
03/03/17 16:26:34: for n_neighbors = 17 and weight =uniform produces score = 0.9545238095238096
03/03/17 16:26:42: for n_neighbors = 18 and weight =uniform produces score = 0.9536904761904762
03/03/17 16:26:51: for n_neighbors = 19 and weight =uniform produces score = 0.9532142857142857
03/03/17 16:26:56: for n_neighbors = 2 and weight =distance produces score = 0.9557142857142857
03/03/17 16:27:03: for n_neighbors = 3 and weight =distance produces score = 0.9611904761904762
03/03/17 16:27:08: for n_neighbors = 4 and weight =distance produces score = 0.9615476190476191
03/03/17 16:27:14: for n_neighbors = 5 and weight =distance produces score = 0.9614285714285714
03/03/17 16:27:19: for n_neighbors = 6 and weight =distance produces score = 0.9610714285714286
03/03/17 16:27:24: for n_neighbors = 7 and weight =distance produces score = 0.9602380952380952
03/03/17 16:27:31: for n_neighbors = 8 and weight =distance produces score = 0.9608333333333333
03/03/17 16:27:38: for n_neighbors = 9 and weight =distance produces score = 0.9598809523809524
03/03/17 16:27:44: for n_neighbors = 10 and weight =distance produces score = 0.9604761904761905
03/03/17 16:27:50: for n_neighbors = 11 and weight =distance produces score = 0.9582142857142857
03/03/17 16:27:57: for n_neighbors = 12 and weight =distance produces score = 0.9596428571428571
03/03/17 16:28:06: for n_neighbors = 13 and weight =distance produces score = 0.9572619047619048
03/03/17 16:28:13: for n_neighbors = 14 and weight =distance produces score = 0.9579761904761904
03/03/17 16:28:20: for n_neighbors = 15 and weight =distance produces score = 0.9547619047619048
03/03/17 16:28:28: for n_neighbors = 16 and weight =distance produces score = 0.955595238095238
03/03/17 16:28:35: for n_neighbors = 17 and weight =distance produces score = 0.955595238095238
03/03/17 16:28:44: for n_neighbors = 18 and weight =distance produces score = 0.955
03/03/17 16:28:53: for n_neighbors = 19 and weight =distance produces score = 0.955
Done
# Train all model using testing datapca=PCA(n_components=32)
pca.fit(train, target)
transform=pca.transform(train)
KNNmodel=KNeighborsClassifier(n_neighbors=4, weights='distance').fit(transform, target)
print(KNNmodel)
PaySim simulates mobile money transactions based on a sample of real transactions extracted from one month of financial logs from a mobile money service implemented in an African country.
The intention of this project is to scrape the commodity or other asset data from investing.com, which utilizes Python to get date, price (open, close), volume, and movement (change in %).