from time import time
import logging
import pylab as pl
import numpy as np
import matplotlib.pyplot as plt
# !pip install -U scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
Facial Recognition with Principal Component Analysis
Using PCA to identify famous faces (Eigen Face)

Here we use principal component analysis (PCA) to reduce the number of features in a dataset of faces. The PC’s are then fed into a Support Vector Machine (SVM) classifier to classify the faces based on learned features.
The dataset used in this example is a preprocessed excerpt of the “Labeled Faces in the Wild”, aka LFW
Import the libraries
Here we import the libraries that we need for later
#Display progress
=logging.INFO, format='%(asctime)s %(message)s') logging.basicConfig(level
Get the data
Here we pull in the data and store it in numpy arrays
#Download the data
=fetch_lfw_people(min_faces_per_person=70, resize=0.4)
lfw_people
#Find out shape infomration about the images to help with plotting them
=lfw_people.images.shape
n_samples, h, w
42)
np.random.seed(
# for machine learning we use the data directly (as relative pixel
# position info is ignored by this model)
= lfw_people.data
X = X.shape[1]
n_features
# the label to predict is the ID of the person
= lfw_people.target
y = lfw_people.target_names
target_names = target_names.shape[0]
n_classes
print ("Total dataset size:")
print ("n_samples: %d" % n_samples)
print ("n_features: %d" % n_features)
print ("n_classes: %d" % n_classes)
print ("Classes: %s" % target_names)
Downloading LFW metadata: https://ndownloader.figshare.com/files/5976012
2019-07-19 05:21:21,839 Downloading LFW metadata: https://ndownloader.figshare.com/files/5976012
Downloading LFW metadata: https://ndownloader.figshare.com/files/5976009
2019-07-19 05:21:28,215 Downloading LFW metadata: https://ndownloader.figshare.com/files/5976009
Downloading LFW metadata: https://ndownloader.figshare.com/files/5976006
2019-07-19 05:21:29,542 Downloading LFW metadata: https://ndownloader.figshare.com/files/5976006
Downloading LFW data (~200MB): https://ndownloader.figshare.com/files/5976015
2019-07-19 05:21:31,138 Downloading LFW data (~200MB): https://ndownloader.figshare.com/files/5976015
Total dataset size:
n_samples: 1288
n_features: 1850
n_classes: 7
Classes: ['Ariel Sharon' 'Colin Powell' 'Donald Rumsfeld' 'George W Bush'
'Gerhard Schroeder' 'Hugo Chavez' 'Tony Blair']
#Lets look at the data to see what they look like
pl.figurefor i in range(0,3):
1,3,i+1)
pl.subplot(=pl.cm.bone)
pl.imshow(X[i].reshape((h,w)), cmap
pl.title(target_names[lfw_people.target[i]])
pl.xticks(())
pl.yticks(())
##Split Data (Test|Train) Here we split the data into a testing and train set
=train_test_split(X,y, test_size=0.25, random_state=42)
X_train, X_test, y_train, y_test y_test.dtype
dtype('int64')
##Do PCA and Dimensionality Reduction on Data Now we compute the PC’s using PCA
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
= 250
n_components
print("Extracting the top %d eignefaces from %d faces" % (n_components, X_train.shape[0]))
#Initiate a time counter (kinda like tic toc)
=time()
t0
#Here we take the training data and compute the PCs
=PCA(n_components=n_components, whiten=True).fit(X_train)
pca
#Print the time it took to compute
print("Done in %0.3fs" % (time()-t0))
#Reshape the PCs to the image format
=pca.components_.reshape((n_components,h,w))
eigenfaces
pl.figurefor i in range(0,3):
1,3,i+1)
pl.subplot(=pl.cm.bone)
pl.imshow(eigenfaces[i], cmap"Eigenface PC- %d" % (i+1))
pl.title(
pl.xticks(())
pl.yticks(())# cbar = pl.colorbar()
# cbar.solids.set_edgecolor("face")
# pl.draw()
# print(pca.explained_variance_ )
# print(pca.explained_variance_ratio_)
Extracting the top 250 eignefaces from 966 faces
Done in 0.399s
Look at the top PC’s
Here we’re going to plot the top PCs identified
=21
top_pcs=(9, 3))
plt.figure( figsize121)
plt.subplot(0:top_pcs])
plt.bar(np.arange(top_pcs),pca.explained_variance_['PCs')
plt.xlabel('Var')
plt.ylabel(
122)
plt.subplot(0:top_pcs])
plt.bar(np.arange(top_pcs),pca.explained_variance_ratio_['PCs')
plt.xlabel('Ratio')
plt.ylabel(
np.shape(pca.explained_variance_)
Projecting the PCs
Now that we have the PCs (the vectors that account for the max variances) we can project the data down to the PCs. In this case they are the eigenfaces from above.
print("Projecting the input data on the eignefaces orthonormal basis")
=time()#tic
t0
= pca.transform(X_train) #take the training data and project it to eigenfaces
X_train_pca = pca.transform(X_test)#take the test data and project it to eigenfaces
X_test_pca print("Done in %0.3fs" % (time()- t0)) #toc
Projecting the input data on the eignefaces orthonormal basis
Done in 0.035s
##Train a SVM Classification Model
So now that we have the most important features extracted out by PCA we can use those to train a classifier. The SVM classifier can then be used to predict who’s face is being presented
#Training
print ("Fitting the classifier to the training set")
= time()
t0 #These set parameters that we want to optimize. These are passed to GridSearch
#which uses the optimal paramters in the fitting classifier =clf
= {
param_grid 'C': [1e3, 5e3, 1e4, 5e4, 1e5],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}# for sklearn version 0.16 or prior, the class_weight parameter value is 'auto'
= GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
clf = clf.fit(X_train_pca, y_train)
clf print ("done in %0.3fs" % (time() - t0))
print ("Best estimator found by grid search:")
print (clf.best_estimator_)
Fitting the classifier to the training set
done in 39.016s
Best estimator found by grid search:
SVC(C=1000.0, cache_size=200, class_weight='balanced', coef0=0.0,
decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
max_iter=-1, probability=False, random_state=None, shrinking=True,
tol=0.001, verbose=False)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
warnings.warn(CV_WARNING, FutureWarning)
#Testing the classifier
print ("Predicting the people names on the testing set")
= time()
t0 = clf.predict(X_test_pca)
y_pred print ("done in %0.3fs" % (time() - t0))
print (classification_report(y_test, y_pred, target_names=target_names))
print (confusion_matrix(y_test, y_pred, labels=range(n_classes)))
Predicting the people names on the testing set
done in 0.125s
precision recall f1-score support
Ariel Sharon 0.64 0.69 0.67 13
Colin Powell 0.75 0.90 0.82 60
Donald Rumsfeld 0.82 0.67 0.73 27
George W Bush 0.91 0.92 0.91 146
Gerhard Schroeder 0.87 0.80 0.83 25
Hugo Chavez 0.80 0.53 0.64 15
Tony Blair 0.82 0.78 0.80 36
accuracy 0.84 322
macro avg 0.80 0.76 0.77 322
weighted avg 0.84 0.84 0.84 322
[[ 9 1 1 2 0 0 0]
[ 1 54 2 2 0 1 0]
[ 2 3 18 3 0 0 1]
[ 1 6 1 134 1 1 2]
[ 0 2 0 1 20 0 2]
[ 0 4 0 2 0 8 1]
[ 1 2 0 3 2 0 28]]
##Visualizing Results Now that we’ve trained and tested the SVM to classify faces lets visualized the results
#Create a helper function to look at the pictures
def plot_gallery(images, titles, h, w, n_row=3, n_col=4):
"""Helper function to plot a gallery of portraits"""
=(1.8 * n_col, 2.4 * n_row))
pl.figure(figsize=0, left=.01, right=.99, top=.90, hspace=.35)
pl.subplots_adjust(bottomfor i in range(n_row * n_col):
+ 1)
pl.subplot(n_row, n_col, i =pl.cm.gray)
pl.imshow(images[i].reshape((h, w)), cmap=12)
pl.title(titles[i], size
pl.xticks(())
pl.yticks(())
# plot the result of the prediction on a portion of the test set
def title(y_pred, y_test, target_names, i):
= target_names[y_pred[i]].rsplit(' ', 1)[-1]
pred_name = target_names[y_test[i]].rsplit(' ', 1)[-1]
true_name return 'predicted: %s\ntrue: %s' % (pred_name, true_name)
= [title(y_pred, y_test, target_names, i)
prediction_titles for i in range(y_pred.shape[0])]
#Now print out preductions
# prediction_titles=(X_test, prediction_titles,h,w)
plot_gallery(X_test, prediction_titles, h, w )
#Plot the eignefaces
=["Eigneface %d " % i for i in range(eigenfaces.shape[0])]
eigenface_titles
plot_gallery(eigenfaces, eigenface_titles, h, w)
plt.show()
Archived
Project Archive Note:
This project is archived.
Please note that library and framework versions may be outdated.
Last updated:
- April 2025