Wie hier geschrieben, letzte Woche habe ich beim SIITME den Forschungsartikel Identification of Traditional Motifs using Convolutional Neural Networks präsentiert.
Abstrakt: Dieses Papier präsentiert ein Design zur Identifizierung und Klassifizierung der traditionellen Motive Rumäniens aus 4 verschiedenen Kategorien (Kleidung, Keramik, Teppiche und bemalte Eier), indem ein CNN-Modell (Convolutional Neural Network) aus der Residual Network (ResNet-50) -Architektur trainiert wird. Wir haben auch ein System implementiert, das durch eine Webcam erkennen kann, ob das Objekt davor ein erlerntes Motiv enthält. Experimentelle Ergebnisse zeigen, dass unser neuronales Netzwerk eine Gesamtgenauigkeit von 99,4% und eine kürzere Bearbeitungszeit der Webcam aufweist.
Sie können den Artikel hier lesen: https://ieeexplore.ieee.org/document/8599199
Mein wissenschaftliches Posterdesign:
Das Interesse an meinem Papier kam vor allem von Frauen, die die rumänische Tracht trugen (zum Beispiel IA), wie auch auf dem Bild unten zu sehen ist:
Hier finden Sie den gesamten Code (einschließlich des trainierten Modells) hinter der Forschungsarbeit.
heatmaps.py
### Code created by Sorin Liviu Jurj for his paper called „Identification of Traditional Motifs using Convolutional Neural Networks“. More information here: https://www.jurj.de/identification-of-traditional-motifs-using-convolutional-neural-networks/
###
import tensorflow as tf
import keras
import numpy as np
import keras.backend as K
import cv2
def target_category_loss(x, category_index, nb_classes):
return tf.multiply(x, K.one_hot([category_index], nb_classes))
def target_category_loss_output_shape(input_shape):
return input_shape
def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + 1e-5)
def grad_cam(input_model, image, category_index, layer_name):
nb_classes = 5 # 1000
target_layer = lambda x: target_category_loss(x, category_index, nb_classes)
x = input_model.layers[-1].output
x = keras.layers.Lambda(
target_layer, output_shape=target_category_loss_output_shape)(x)
model = keras.models.Model(input_model.layers[0].input, x)
loss = K.sum(model.layers[-1].output)
conv_output = [l for l in model.layers if l.name == layer_name][0].output
grads = normalize(K.gradients(loss, conv_output)[0])
gradient_function = K.function([model.layers[0].input],
[conv_output, grads])
output, grads_val = gradient_function([image])
output, grads_val = output[0, :], grads_val[0, :, :, :]
weights = np.mean(grads_val, axis=(0, 1))
cam = np.ones(output.shape[0:2], dtype=np.float32)
for i, w in enumerate(weights):
cam += w * output[:, :, i]
cam = cv2.resize(cam, (224, 224))
cam = np.maximum(cam, 0)
heatmap = cam / np.max(cam)
# Return to BGR [0..255] from the preprocessed image
image = image[0, :]
image -= np.min(image)
image = np.minimum(image, 255)
cam = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_RAINBOW)
cam = np.float32(cam) + np.float32(image)
cam = 255 * cam / np.max(cam)
return np.uint8(cam), heatmap, image
def get_heatmap(model, image, prediction):
image = image[None, …]
# Layer which will be used to compute the activations. Should be the last conv layer before FC layers
bottleneck_layer = „activation_49“
cam, heatmap, image = grad_cam(model, image, prediction,
bottleneck_layer)
return cam
inference.py
### Code created by Sorin Liviu Jurj for his paper called „Identification of Traditional Motifs using Convolutional Neural Networks“. More information here: https://www.jurj.de/identification-of-traditional-motifs-using-convolutional-neural-networks/
###
from keras.applications.resnet50 import preprocess_input
import cv2
import numpy as np
import time
from PIL import Image
# Resize image (from src to dest filepath)
# by specifying the size of the smaller side
def resize_to(img, size=256):
if img is None:
return
(h, w) = img.shape[:2]
# Find smaller size
if h < w:
ratio = size / h
else:
ratio = size / w
# Here we have weight by height
outsize = (int(w * ratio), int(h * ratio))
return cv2.resize(img, outsize)
def crop_center(img, cropx=224, cropy=224):
y, x = img.shape[:2]
startx = x // 2 – (cropx // 2)
starty = y // 2 – (cropy // 2)
return img[starty:starty + cropy, startx:startx + cropx, …]
def preprocess_image(image):
resized = resize_to(image)
cropped = crop_center(resized)
return cropped
def run_inference(model, image):
# Preprocess image
processed_image = preprocess_image(image)
batch = preprocess_input(processed_image[None, …].astype(‚float‘))
start = time.time()
predictions = model.predict_on_batch(batch)
end = time.time()
print(end – start)
return np.argmax(predictions, axis=1)[0], predictions
webcam_demo.py
### Code created by Sorin Liviu Jurj for his paper called „Identification of Traditional Motifs using Convolutional Neural Networks“. More information here: https://www.jurj.de/identification-of-traditional-motifs-using-convolutional-neural-networks/
###
# USAGE
# python fps_demo.py
# import the necessary packages
from __future__ import print_function
import imutils
from imutils.video import WebcamVideoStream
from imutils.video import FPS
import argparse
import cv2
import numpy as np
import keras
import heatmaps
import inference
import time
# construct the argument parse and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument(„-n“, „–num-frames“, type=int, default=100,
# help=“# of frames to loop over for FPS test“)
# ap.add_argument(„-d“, „–display“, type=int, default=-1,
# help=“Whether or not frames should be displayed“)
# args = vars(ap.parse_args())
# Display text on an image/frame
def display_text(img, text, x=10, y=20):
# Create a black image
# Write some Text
font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (x, y)
fontScale = 0.5
fontColor = (0, 0, 255)
lineType = 1
cv2.putText(img, text,
bottomLeftCornerOfText,
font,
fontScale,
fontColor,
lineType)
cv2.imshow(„Frame“, img)
cv2.waitKey(1)
# Is the pressed key space?
def is_space(key):
return key == ord(‚ ‚)
# Display webcam. Trigger classification if Space is pressed.
def capture_frames():
is_capturing = True
while is_capturing: # fps._numFrames < args[„num_frames“]:
try:
# grab the frame from the threaded video stream and resize it
# to have a maximum width of 800 pixels
frame = vs.read()
# frame = cv2.imread(‚test_images/2.jpg‘)
frame = imutils.resize(frame, width=800)
clean_frame = frame.copy()
# check to see if the frame should be displayed to our screen
display_text(clean_frame, „Press Space Bar in order to start the detection of Romanian traditional motifs“)
# cv2.waitKey() returns a 32 Bit integer value. The key input is in ASCII which is an 8 Bit integer value. So you only care about these 8 bits and want all other bits to be 0. This you can achieve with:
key = cv2.waitKey(25) & 0xFF
if is_space(key):
print(‚Detecting on frame‘)
display_text(clean_frame, ‚Detecting the class containing Romanian traditional motifs. Please wait…‘, 10, 35)
# frame = cv2.imread(‚test_images/2.jpg‘)
# frame = imutils.resize(frame, width=800)
start = time.time()
prediction, predictions = inference.run_inference(model, clean_frame)
end = time.time()
print(end – start)
# Clear the previous frame
prediction_to_class = {
0: ‚Carpets‘,
1: ‚Ceramics‘,
2: ‚Clothes‘,
3: ‚Painted_Eggs‘,
4: ‚imagenet_resized_256‘
};
if prediction != 4:
text = f“{prediction_to_class[prediction]} containing Romanian traditional motifs detected. Confidence: ({predictions[0][prediction]:.3f})“
else:
text = f“No Romanian traditional motifs detected. Confidence: ({predictions[0][prediction]:.3f})“
display_text(clean_frame, text, 10, 50)
if (prediction != 4):
heatmap = heatmaps.get_heatmap(model,
inference.preprocess_image(
frame), prediction)
resized_heatmap = cv2.resize(heatmap, frame.shape[:2][::-1])
display_text(resized_heatmap, „Press Space Bar to start again.“, 10, 65)
paused = True
while paused:
key = cv2.waitKey(1) & 0xFF
if is_space(key):
paused = False
except KeyboardInterrupt:
is_capturing = False
# setup the model
print(‚Loading model‘)
model = keras.models.load_model(‚checkpoints/epoch_53.hdf5‘)
print(‚Model loaded‘)
# created a *threaded *video stream, allow the camera senor to warmup,
vs = WebcamVideoStream(src=0).start()
capture_frames()
# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()
Neueste Kommentare