박민혀기

캡스톤 Age_Perception, Face_Perception(dlib model) 본문

Deep Learning

캡스톤 Age_Perception, Face_Perception(dlib model)

박민혀기 2023. 4. 4. 11:49

Age_Perception Source Code(Image process)

모델 파일은 네이버 드라이버에 업로드

res_img.zip
1.99MB

import cv2, glob, dlib

age_list = ['(0, 4)','(5, 9)','(10, 19)','(20, 24)','(25, 29)','(30, 39)','(40, 49)','(50, 80)']
gender_list = ['Male', 'Female']

detector = dlib.get_frontal_face_detector()

age_net = cv2.dnn.readNetFromCaffe('models/deploy_age.prototxt', 'models/age_net.caffemodel')
gender_net = cv2.dnn.readNetFromCaffe('models/deploy_gender.prototxt', 'models/gender_net.caffemodel')

img_list = glob.glob('img/*.jpg')

for img_path in img_list:
	img = cv2.imread(img_path)

	faces = detector(img)

	for face in faces:
		x1, y1, x2, y2 = face.left(), face.top(), face.right(), face.bottom()

		face_img = img[y1:y2, x1:x2].copy()

		blob = cv2.dnn.blobFromImage(face_img, scalefactor=1, size=(227, 227), mean=(78.4263377603, 87.7689143744, 114.895847746), swapRB=False, crop=False)

		# predict gender
		gender_net.setInput(blob)
		gender_preds = gender_net.forward()
		gender = gender_list[gender_preds[0].argmax()]

		# predict age
		age_net.setInput(blob)
		age_preds = age_net.forward()
		age = age_list[age_preds[0].argmax()]

    # visualize
		cv2.rectangle(img, (x1, y1), (x2, y2), (255,255,255), 2)
		overlay_text = '%s %s' % (gender, age)
		cv2.putText(img, overlay_text, org=(x1, y1), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,0), thickness=10)
		cv2.putText(img, overlay_text, org=(x1, y1), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255,255,255), thickness=2)

	cv2.imshow('img', img)
	cv2.imwrite('result/%s' % img_path.split('/')[-1], img)

	key = cv2.waitKey(0) & 0xFF
	if key == ord('q'):
		break

 

Face_Perception Source Code(Image process)

모델 파일은 네이버 드라이버에 업로드

img.zip
0.78MB

descs_make.py

import dlib, cv2
import numpy as np
import time

start = time.time()
detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor('models/shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')

def find_faces(img):
	dets = detector(img, 1)
	if len(dets) == 0:
		return np.empty(0), np.empty(0), np.empty(0)

	rects, shapes = [], []
	shapes_np = np.zeros((len(dets), 68, 2), dtype=np.int)
	for k, d in enumerate(dets):
		rect = ((d.left(), d.top()), (d.right(), d.bottom()))
		rects.append(rect)

		shape = sp(img, d)

        	# convert dlib shape to numpy array
		for i in range(0, 68):
			shapes_np[k][i] = (shape.part(i).x, shape.part(i).y)

		shapes.append(shape)
	return rects, shapes, shapes_np

def encode_faces(img, shapes):
	face_descriptors = []
	for shape in shapes:
		face_descriptor = facerec.compute_face_descriptor(img, shape)
		face_descriptors.append(np.array(face_descriptor))

	return np.array(face_descriptors)

img_paths = {}
descs = {}

f = open('img/name.txt', 'r')
lines = f.readlines()
for name in lines:
        img_paths[name.strip()] = 'img/' + name.strip() + '.jpg'
        descs[name.strip()] = None

for name, img_path in img_paths.items():
	img_bgr = cv2.imread(img_path)
	img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

	_, img_shapes, _ = find_faces(img_rgb)
	descs[name] = encode_faces(img_rgb, img_shapes)[0]

np.save('img/descs.npy', descs)
print(descs)

end = time.time()
print(f"{end - start : 5f} sec")

 

Video_Perception.py

import dlib, cv2
import numpy as np

detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor('models/shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1('models/dlib_face_recognition_resnet_model_v1.dat')

descs = np.load('img/descs.npy', allow_pickle=True)[()]

def encode_face(img):
	dets = detector(img, 1)

	if len(dets) == 0:
		return np.empty(0)

	for k, d in enumerate(dets):
		shape = sp(img, d)
		face_descriptor = facerec.compute_face_descriptor(img, shape)

		return np.array(face_descriptor)

cap = cv2.VideoCapture(0)

if not cap.isOpened():
	exit()

_, img_bgr = cap.read() # (800, 1920, 3)
padding_size = 0
resized_width = 640
video_size = (resized_width, int(img_bgr.shape[0] * resized_width // img_bgr.shape[1]))
#output_size = (resized_width, int(img_bgr.shape[0] * resized_width // img_bgr.shape[1] + padding_size * 2))

#fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
#writer = cv2.VideoWriter('%s_output.mp4' % (video_path.split('.')[0]), fourcc, cap.get(cv2.CAP_PROP_FPS), output_size)

while True:
	ret, img_bgr = cap.read()
	if not ret:
		break

	img_bgr = cv2.resize(img_bgr, video_size)
	img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

	dets = detector(img_bgr, 1)

	for k, d in enumerate(dets):
		shape = sp(img_rgb, d)
		face_descriptor = facerec.compute_face_descriptor(img_rgb, shape)

		last_found = {'name': 'unknown', 'dist': 0.5, 'color': (0,0,255)}

		for name, saved_desc in descs.items():
			dist = np.linalg.norm([face_descriptor] - saved_desc, axis=1)
			if dist < last_found['dist']:
				last_found = {'name': name, 'dist': dist, 'color': (255,255,255)}

		print(last_found['name'], ' : ', last_found['dist'])
		cv2.rectangle(img_bgr, pt1=(d.left(), d.top()), pt2=(d.right(), d.bottom()), color=last_found['color'], thickness=2)
		cv2.putText(img_bgr, last_found['name'], org=(d.left(), d.top()), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=last_found['color'], thickness=2)

#	writer.write(img_bgr)

	cv2.imshow('img', img_bgr)
	if cv2.waitKey(1) == ord('q'):
		break

cap.release()
#writer.release()