In [588]:
%matplotlib inline 
from matplotlib import pyplot as plt
import matplotlib.image as img
import os
import numpy as np
trainData = []
for filename in os.listdir('Train1/'):
    if filename.endswith(".jpg"):
        image = img.imread('Train1/' + filename) ## Load an image as numpy array
        trainData.append(image)

plt.imshow(image, interpolation='nearest') #display the image 
plt.gray()  #grayscale conversion
plt.show()
print(len(trainData))
3772

Flatten images:

In [589]:
flatTrain = []
for img_t in trainData:
    flatTrain.append(img_t.flatten())

Mean subtract:

In [590]:
psi = np.zeros(len(flatTrain[0]))
l = len(flatTrain)
for row in range(0,len(flatTrain[0])):
    r = 0
    for mat in flatTrain:
        r += mat[row]
    av_r = r/l
    psi[row] += av_r
    
#psi is mean face    

#Subtract Each Faces from the Mean Face, Phi Array
A = []
for img_n2 in flatTrain:
    A.append(np.subtract(img_n2, psi))

Find Covariance Matrix and get the eigenvalues and normailized eigenvectors. **np.eig() returns normalized eigenvectors

In [591]:
C = np.cov(A)

val,vec = np.linalg.eig(C)

Take top eigenvectors to account for pvt_var% of the varience:

In [611]:
pct_var = 90
total_var = 0

#find cumalitive varience
for i in range(0, len(vec)):
    
    total_var = total_var + val[i]
    
res = {val[i]: vec[i] for i in range(len(val))}

val = sorted(val,reverse=True)

var_captured = 0

iter = 0

#find how many eigenvalues account for pct_var of varience:
while((var_captured/total_var *100 <= pct_var) and iter < len(val)):
    
    var_captured = var_captured + val[iter]
    
    iter = iter + 1
    
print("K = " + str(iter)) 
K = 264

The number printed above is the number of Eigenvectors that account for pct_var% of the varience. Project into the eigenspace and show what the first eigenface looks like:

In [601]:
print("Original Face:")
imgO = flatTrain[0].reshape(150,130)
plt.imshow(imgO, interpolation='nearest')
plt.show()

reduced_evecs_1 = []
for x in range(0, iter):
    reduced_evecs_1.append(res[val[x]])
proj_data_all = np.dot(np.transpose(A),np.transpose(reduced_evecs_1))
print("first eigenface:")
proj_data_all=np.transpose(proj_data_all)
img264 = proj_data_all[0].reshape(150,130)
plt.imshow(img264, interpolation='nearest')
plt.show()
Original Face:
first eigenface:

Varience captured by each eigenvector:

In [594]:
plt.plot(val/total_var)
plt.xlabel("Eigenvalue")
plt.ylabel("% Varience Captured")
plt.show()

Show various eigenfaces:

In [595]:
print("Varience in 1st Eigenface: " +  str(val[0]/total_var))
reduced_to_one_evecs = []
reduced_to_one_evecs.append(res[val[0]])
proj_data1 = np.dot(np.transpose(A),np.transpose(reduced_to_one_evecs))
proj_data1 = np.transpose(proj_data1)
img1z = proj_data1[0].reshape(150,130)
plt.imshow(img1z, interpolation='nearest')
plt.show()

print("Varience in 2nd Eigenface: " +  str(val[1]/total_var))
reduced_to_one_evecs = []
reduced_to_one_evecs.append(res[val[1]])
proj_data1 = np.dot(np.transpose(A),np.transpose(reduced_to_one_evecs))
proj_data1 = np.transpose(proj_data1)
img2z = proj_data1[0].reshape(150,130)
plt.imshow(img2z, interpolation='nearest')
plt.show()

print("Varience in 3rd Eigenface: " +  str(val[2]/total_var))
reduced_to_one_evecs = []
reduced_to_one_evecs.append(res[val[2]])
proj_data1 = np.dot(np.transpose(A),np.transpose(reduced_to_one_evecs))
proj_data1 = np.transpose(proj_data1)
img3z = proj_data1[0].reshape(150,130)
plt.imshow(img3z, interpolation='nearest')
plt.show()

print("Varience in the last taken Eigenface (number " + str(iter) +"): " +  str(val[iter-1]/total_var))
reduced_to_one_evecs = []
reduced_to_one_evecs.append(res[val[iter-1]])
proj_data1 = np.dot(np.transpose(A),np.transpose(reduced_to_one_evecs))
proj_data1 = np.transpose(proj_data1)
imglz = proj_data1[0].reshape(150,130)
plt.imshow(imglz, interpolation='nearest')
plt.show()
Varience in 1st Eigenface: 0.13757664077798368
Varience in 2nd Eigenface: 0.11675245891684263
Varience in 3rd Eigenface: 0.05684056335544166
Varience in the last taken Eigenface (number 264): 0.00024107702646764513

Find weights for all images:

In [596]:
reduced_evecs = []
for x in range(0, iter):
    reduced_evecs.append(res[val[x]])
proj_data = np.dot(np.transpose(A),np.transpose(reduced_evecs))
proj_data=np.transpose(proj_data)
w = np.array([np.dot(proj_data_all,i) for i in A])
print(np.shape(proj_data))
print(np.shape(proj_data_all))
(264, 19500)
(264, 19500)

The First K eigenfaces added together:

In [604]:
linComb = np.dot(w[0], proj_data_all)
linComb = linComb.reshape(150,130)
plt.imshow(linComb, interpolation='nearest')
plt.show()    

Now attempt to match faces. Read in test data:

In [605]:
#get new faces from test data
testData = []
for filename in os.listdir('Test1/'):
    if filename.endswith(".jpg"):
        image = img.imread('Test1/' + filename) ## Load an image as numpy array
        testData.append(image)

Now find the most similar image in the train set:

In [614]:
testing = testData[33]
testing_img = np.subtract(testing.flatten(), psi)
#get weight vector
wv = np.array([np.dot(i,testing_img) for i in proj_data_all])
np.shape(wv)
diffs = []
for weight in w:
    tmp = wv
    diffs.append(np.subtract(tmp, weight))
n_diff = []
for diff in diffs:
        n_diff.append(np.linalg.norm(diff))
        
#threshold
THRESHOLD = 70000000
imgMatch = []
if(min(n_diff) > THRESHOLD):
    imgMatch = None
else:
    imgMatch = flatTrain[np.argmin(n_diff)].reshape(150,130)
if(imgMatch is not None):
    print("Index of matched image:" + str(np.argmin(n_diff)))
    print("Original image:")
    plt.imshow(testing, interpolation='nearest')
    plt.show()
    print("Matched image:")
    plt.imshow(imgMatch, interpolation='nearest')
    plt.show()
else:
    print("NO MATCH FOR IMAGE:")
    plt.imshow(testing, interpolation='nearest')
    plt.show()
    print("Closest Match: ")
    imgMatch = flatTrain[np.argmin(n_diff)].reshape(150,130)
    plt.imshow(imgMatch, interpolation='nearest')
    plt.show()
    
print("distance: " + str(min(n_diff)))
Index of matched image:643
Original image:
Matched image:
distance: 67091789.48343685
In [ ]:

In [ ]: