Skip to content
Snippets Groups Projects
Commit dcf0e120 authored by Zhiying Li's avatar Zhiying Li
Browse files

Update...

Update Code/Ball_Detection/PyTorch_with_ESPCAM/live-full-detection-detecto_with_ESP_CAM.py, imageTread.py files
Deleted Code/ESP32_Cam/CameraWebServer/.gitkeep, Code/ESP32_Cam/CameraWebServer.ino, Code/ESP32_Cam/app_httpd.cpp, Code/ESP32_Cam/camera_index.h, Code/ESP32_Cam/camera_pins.h files
parent fd8e9dbc
No related merge requests found
## Codebase - Live Training-based Ball Detection (COMPLETE)
### Import Modules and Drive
import os
import cv2
import glob
import torch
import numpy as np
import torch.nn as nn
import torch.quantization
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from urllib.request import urlopen, Request #
from base64 import b64decode
from datetime import datetime
from matplotlib import patches
from detecto import core, utils
from torchvision import transforms
from matplotlib import pyplot as plt
from IPython.display import Image, display, Javascript
distanceDetect = __import__('distance-detection-torch.distance-detection-torch', fromlist = ['distanceDetect']).distanceDetect
print("torch.cuda.is_available() = ", torch.cuda.is_available())
#change the IP address below according to the
#IP shown in the Serial monitor of Arduino code
# url='http://192.168.1.107/cam-lo.jpg'
url='http://192.168.1.107/cam-hi.jpg'
# url='http://192.168.1.107/cam-mid.jpg'
#### Modify Detecto Core to include possibility of other base models
def modifyCore():
cModLineNums = [221, 254]
cAddLineNums = [254, 256, 257, 258, 259]
# REPLACABLE LINE FOR DIFFERENT LOCAL COMPUTER DEVICES
coreFile = '/opt/anaconda3/envs/FORAY/lib/python3.8/site-packages/detecto/core.py'
cModLineVals = [" def __init__(self, classes=None, device=None, pretrained=True, modelname=\'fasterrcnn_resnet50_fpn\'):\n",
" self._model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pretrained)\n"]
cAddLineVals = [" if modelname == \'fasterrcnn_resnet50_fpn\':\n",
" elif modelname == \'fasterrcnn_mobilenet_v3_large_fpn\':\n",
" self._model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=pretrained)\n",
" elif modelname == \'fasterrcnn_mobilenet_v3_large_320_fpn\':\n",
" self._model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(pretrained=pretrained)\n",
" else:",
" return ValueError('Unknown Pretrained Model')"]
coreRead = open(coreFile, "r")
coreLines = coreRead.readlines()
if coreLines[253][-14:-1] != 'User-Modified':
for count, cModLineNum in enumerate(cModLineNums):
coreLines[cModLineNum] = cModLineVals[count]
for count, cAddLineNum in enumerate(cAddLineNums):
coreLines.insert(cAddLineNum, cAddLineVals[count])
coreRead.close()
coreWrite = open(coreFile, "w")
coreWrite.writelines(coreLines)
coreWrite.close()
### Train the Model
def modelName(type = 1):
if type == 1:
return 'fasterrcnn_resnet50_fpn'
elif type == 2:
return 'fasterrcnn_mobilenet_v3_large_fpn'
elif type == 3:
return 'fasterrcnn_mobilenet_v3_large_320_fpn'
else:
return ValueError('Unknown Pretrained Model')
def modelTitle(modelLoc, device, type):
time = datetime.now().strftime("%Y%m%d_%H%M%S")
if device == 'cpu':
dev = 'cpu'
elif device[:4] == 'cuda':
dev = 'cuda'
return modelLoc + 'model_weights-' + str(type) + '-' + time + '-' + dev + '.pth'
def returnModel(modelAction, device, trainLoc, labelSet, modelLoc, modelFile):
if modelAction == 'new':
dataset = core.Dataset(trainLoc)
model = core.Model(labelSet, device = device, modelname = modelName(modelType))
losses = model.fit(dataset, epochs = 10, verbose = True)
model.save(modelTitle(modelLoc, device, modelType))
elif modelAction == 'load':
model = core.Model(labelSet, modelname = modelname)
#model = core.Model(classes = labelSet, device = device, pretrained = True, model_name = modelName(modelType))
model._model.load_state_dict(torch.load(modelLoc + modelFile, map_location = torch.device(device)))
# model = torch.quantization.quantize_dynamic(model.get_internal_model(), dtype=torch.qint8)
return model
### Object Detection
def objectDetection(loc):
image = utils.read_image(loc)
predictions = model.predict(image)
tLabels, tBoxes, tScores = predictions
labels, boxes, scores = [[] for i in range(3)]
imHeight, imWidth, _ = image.shape
for count, box in enumerate(tBoxes):
x0, y0 = float(box[0]), float(box[1])
x1, y1 = float(box[2]), float(box[3])
w = x1 - x0
h = y1 - y0
center = (x0 + (w/2), y0 + (h/2))
dCrop = [int(y0*cropFactor + y1*(1-cropFactor)), int(y1*cropFactor + y0*(1-cropFactor)),
int(x0*cropFactor + x1*(1-cropFactor)), int(x1*cropFactor + x0*(1-cropFactor))]
# relativeDist, coordinates = depth_estimate((imHeight, imWidth), (x0, y0, w, h))
# relativeDist = float(relativeDist)
# coordinates = tuple([float(coord) for coord in coordinates])
distance = distanceDetect('load-testImg', distWtsFile, [[x0, x1, y0, y1], [imWidth, imHeight]])
avgClRGB = cv2.mean(image[dCrop[0]:dCrop[1], dCrop[2]:dCrop[3]])
avgClHue = rgbToHue(avgClRGB)
print('Detection Score : ', round(float(tScores[count])*100, 2), '%')
print('Average Color (RGB) : ', avgClRGB)
print('Average Color (Hue) : ', avgClHue)
print('Bounding Box Center : ', center)
print('Distance to Ball : ', distance)
# print('Relative Distance : ', relativeDist)
# print('Coordinates : ', coordinates)
print()
if tScores[count] > minScore and avgClHue > colorLow and avgClHue < colorHigh:
scores.append(float(tScores[count]))
labels.append(tLabels[count]+' '+str(round(float(tScores[count])*100, 2))+'%')
boxes.append ([x0, y0, x1, y1])
displayBoxedImage(image, boxes, labels)
#### Live Detection
def detectLive(model):
# cv2.namedWindow('Ball Detection')
# try:
# cap = cv2.VideoCapture(0)#, cv2.CAP_DSHOW)
# except:
# print('No webcam available.')
# return
ret = True
while ret:
# ret, frame = cap.read()
#print(ret)
#print(frame)
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
req = Request(url, headers=header)
img_resp = urlopen(req, timeout=60)
imgnp=np.array(bytearray(img_resp.read()),dtype=np.uint8)
frame=cv2.imdecode(imgnp,-1)
labels, boxes, scores = model.predict(frame)
# labels, boxes, scores = [np.array([]) for i in range(3)]
for i in range(boxes.shape[0]):
box = boxes[i]
x0, y0 = float(box[0]), float(box[1])
x1, y1 = float(box[2]), float(box[3])
w = x1 - x0
h = y1 - y0
dCrop = [int(y0*cropFactor + y1*(1-cropFactor)), int(y1*cropFactor + y0*(1-cropFactor)),
int(x0*cropFactor + x1*(1-cropFactor)), int(x1*cropFactor + x0*(1-cropFactor))]
avgClRGB = cv2.mean(frame[dCrop[0]:dCrop[1], dCrop[2]:dCrop[3]])
avgClHue = rgbToHue(avgClRGB)
if scores[i] > minScore and avgClHue > colorLow and avgClHue < colorHigh:
cv2.rectangle(frame, (int(x0), int(y0)),
(int(x1), int(y1)),
(0, 255, 0), 2)
if labels:
distance = distanceDetect('load-testImg', distWtsFile, [[x0, x1, y0, y1], frame.shape[:2]])
cv2.putText(frame, '{}: {}% at {} cm'.format(labels[i], round(scores[i].item()*100.0, 1), round(distance, 2)),
(int(box[0]), int(box[1]) - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3)
cv2.imshow('Ball Detection', frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q') or key == 27:
break
cv2.destroyAllWindows()
# cap.release()
### Supplementary Functions
#### Display of Image with Bounding Boxes
def displayBoxedImage(image, boxes = [], labels = None):
fig, ax = plt.subplots(1)
if isinstance(image, torch.Tensor):
image = utils.reverse_normalize(image)
image = transforms.ToPILImage()(image)
ax.imshow(image)
if labels is not None and not utils._is_iterable(labels):
labels = [labels]
for i in range(len(boxes)):
box = boxes[i]
width, height = box[2] - box[0], box[3] - box[1]
initial_pos = (box[0], box[1])
rect = patches.Rectangle(initial_pos, width, height, linewidth = 1, edgecolor = 'r', facecolor = 'none')
if labels:
ax.text(box[0] + 5, box[1] - 5, '{}'.format(labels[i]), color='red')
ax.add_patch(rect)
plt.show()
#### Calculation of Hue from RGB (adopted from [code](https://www.geeksforgeeks.org/program-change-rgb-color-model-hsv-color-model/))
def rgbToHue(RGB):
RGB = [val/255.0 for val in RGB]
cmax = max(RGB)
cmin = min(RGB)
diff = cmax - cmin
r, g, b = RGB[0:3]
if cmax == cmin:
return 0
if cmax == r:
return (60 * ((g - b) / diff) + 360) % 360
if cmax == g:
return (60 * ((b - r) / diff) + 120) % 360
if cmax == b:
return (60 * ((r - g) / diff) + 240) % 360
#### Formula-based Depth Estimation (adopted from [mono_depth.py](https://github.com/agrawalparth10/FORAY-Perception/blob/master/mono_depth/mono_depth.py))
def depth_estimate(figDims, boundingBox):
def x_hat(realLen, ip, i0, imageLen):
return (realLen * (ip-i0)) / imageLen
def y_hat(realLen, jp, j0, imageLen):
return (realLen * (jp-j0)) / imageLen
def z_hat(dist, pixelLen, imageLen):
return dist*(pixelLen/imageLen)
def cal_coordinates(dist, realLen, ip, jp, i0, j0, imageLen, pixelLen):
return (x_hat(realLen, ip, i0, imageLen), y_hat(realLen, jp, j0, imageLen), z_hat(dist, pixelLen, imageLen))
def measure(dist, realLen, ip, jp, i0, j0, imageLen, pixelLen):
x_cor = x_hat(realLen, ip, i0, imageLen)
z_cor = z_hat(dist, pixelLen, imageLen)
dist = (x_cor ** 2 + z_cor ** 2) ** 0.5
return dist
imHeight, imWidth = figDims
center = (imWidth // 2, imHeight // 2)
x, y, w, h = boundingBox[0:4]
relativeDist = measure (dist = 0.51, realLen = 0.228, ip = x, jp = y, i0 = center[0], j0 = center[1], imageLen = w, pixelLen = 500)
coordinates = cal_coordinates(dist = 0.51, realLen = 0.228, ip = x, jp = y, i0 = center[0], j0 = center[1], imageLen = w, pixelLen = 500)
return relativeDist, coordinates
### Declare Varables and Constants
device = 'cpu'
imageLoc = './images/train-test-4/' #
labelSet = ['ball']
modelLoc = './model_weights/' #
trainLoc = imageLoc + 'train/'
minScore = 0.50
colorLow = 60
colorHigh = 180
modelFile = 'model_weights-2-20210818_002355-cpu.pth'
modelname = modelName(int(modelFile[14]))
modelType = 2
cropFactor = 0.90
distWtsFile = './distance-detection-torch/distance-detection-weights-3-2.0sd-20210808_212550.json'
modelAction = 'load' # 'load' to load previous model, 'new' to train new model (only possible on Colab)
#FOR THE NEXT LINE : UNCOMMENT the first time you run, COMMENT OUT after the first time
# modifyCore()
#########
model = returnModel(modelAction, device, trainLoc, labelSet, modelLoc, modelFile)
## Testing (using Live Streaming)
detectLive(model)
## Testing (using Untrained Images)
for file in sorted(glob.glob(imageLoc + '/test/*.jpg')):
objectDetection(file)
## Evaluation (using Trained Images)
for file in sorted(glob.glob(imageLoc + '/train/*.jpg')):
objectDetection(file)
#include "esp_camera.h"
#include <WiFi.h>
//
// WARNING!!! PSRAM IC required for UXGA resolution and high JPEG quality
// Ensure ESP32 Wrover Module or other board with PSRAM is selected
// Partial images will be transmitted if image exceeds buffer size
//
// Select camera model
// #define CAMERA_MODEL_WROVER_KIT // Has PSRAM
// #define CAMERA_MODEL_ESP_EYE // Has PSRAM
// #define CAMERA_MODEL_M5STACK_PSRAM // Has PSRAM
// #define CAMERA_MODEL_M5STACK_V2_PSRAM // M5Camera version B Has PSRAM
// #define CAMERA_MODEL_M5STACK_WIDE // Has PSRAM
// #define CAMERA_MODEL_M5STACK_ESP32CAM // No PSRAM
#define CAMERA_MODEL_AI_THINKER // Has PSRAM
//#define CAMERA_MODEL_TTGO_T_JOURNAL // No PSRAM
#include "camera_pins.h"
const char* ssid = "lemur";
const char* password = "lemur9473";
void startCameraServer();
void setup() {
Serial.begin(115200);
Serial.setDebugOutput(true);
Serial.println();
camera_config_t config;
config.ledc_channel = LEDC_CHANNEL_0;
config.ledc_timer = LEDC_TIMER_0;
config.pin_d0 = Y2_GPIO_NUM;
config.pin_d1 = Y3_GPIO_NUM;
config.pin_d2 = Y4_GPIO_NUM;
config.pin_d3 = Y5_GPIO_NUM;
config.pin_d4 = Y6_GPIO_NUM;
config.pin_d5 = Y7_GPIO_NUM;
config.pin_d6 = Y8_GPIO_NUM;
config.pin_d7 = Y9_GPIO_NUM;
config.pin_xclk = XCLK_GPIO_NUM;
config.pin_pclk = PCLK_GPIO_NUM;
config.pin_vsync = VSYNC_GPIO_NUM;
config.pin_href = HREF_GPIO_NUM;
config.pin_sscb_sda = SIOD_GPIO_NUM;
config.pin_sscb_scl = SIOC_GPIO_NUM;
config.pin_pwdn = PWDN_GPIO_NUM;
config.pin_reset = RESET_GPIO_NUM;
config.xclk_freq_hz = 20000000;
config.pixel_format = PIXFORMAT_JPEG;
// if PSRAM IC present, init with UXGA resolution and higher JPEG quality
// for larger pre-allocated frame buffer.
if (psramFound()) {
config.frame_size = FRAMESIZE_UXGA;
config.jpeg_quality = 10;
config.fb_count = 2;
} else {
config.frame_size = FRAMESIZE_SVGA;
config.jpeg_quality = 12;
config.fb_count = 1;
}
#if defined(CAMERA_MODEL_ESP_EYE)
pinMode(13, INPUT_PULLUP);
pinMode(14, INPUT_PULLUP);
#endif
// camera init
esp_err_t err = esp_camera_init(&config);
if (err != ESP_OK) {
Serial.printf("Camera init failed with error 0x%x", err);
return;
}
sensor_t * s = esp_camera_sensor_get();
// initial sensors are flipped vertically and colors are a bit saturated
if (s->id.PID == OV3660_PID) {
s->set_vflip(s, 1); // flip it back
s->set_brightness(s, 1); // up the brightness just a bit
s->set_saturation(s, -2); // lower the saturation
}
// drop down frame size for higher initial frame rate
s->set_framesize(s, FRAMESIZE_QVGA);
#if defined(CAMERA_MODEL_M5STACK_WIDE) || defined(CAMERA_MODEL_M5STACK_ESP32CAM)
s->set_vflip(s, 1);
s->set_hmirror(s, 1);
#endif
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
Serial.println("");
Serial.println("WiFi connected");
startCameraServer();
Serial.print("Camera Ready! Use 'http://");
Serial.print(WiFi.localIP());
Serial.println("' to connect");
}
void loop() {
// put your main code here, to run repeatedly:
delay(10000);
}
This diff is collapsed.
This diff is collapsed.
#if defined(CAMERA_MODEL_WROVER_KIT)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 21
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 19
#define Y4_GPIO_NUM 18
#define Y3_GPIO_NUM 5
#define Y2_GPIO_NUM 4
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22
#elif defined(CAMERA_MODEL_ESP_EYE)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 4
#define SIOD_GPIO_NUM 18
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 36
#define Y8_GPIO_NUM 37
#define Y7_GPIO_NUM 38
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 35
#define Y4_GPIO_NUM 14
#define Y3_GPIO_NUM 13
#define Y2_GPIO_NUM 34
#define VSYNC_GPIO_NUM 5
#define HREF_GPIO_NUM 27
#define PCLK_GPIO_NUM 25
#elif defined(CAMERA_MODEL_M5STACK_PSRAM)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM 27
#define SIOD_GPIO_NUM 25
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 19
#define Y8_GPIO_NUM 36
#define Y7_GPIO_NUM 18
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 5
#define Y4_GPIO_NUM 34
#define Y3_GPIO_NUM 35
#define Y2_GPIO_NUM 32
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM 26
#define PCLK_GPIO_NUM 21
#elif defined(CAMERA_MODEL_M5STACK_V2_PSRAM)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM 27
#define SIOD_GPIO_NUM 22
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 19
#define Y8_GPIO_NUM 36
#define Y7_GPIO_NUM 18
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 5
#define Y4_GPIO_NUM 34
#define Y3_GPIO_NUM 35
#define Y2_GPIO_NUM 32
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 26
#define PCLK_GPIO_NUM 21
#elif defined(CAMERA_MODEL_M5STACK_WIDE)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM 27
#define SIOD_GPIO_NUM 22
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 19
#define Y8_GPIO_NUM 36
#define Y7_GPIO_NUM 18
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 5
#define Y4_GPIO_NUM 34
#define Y3_GPIO_NUM 35
#define Y2_GPIO_NUM 32
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 26
#define PCLK_GPIO_NUM 21
#elif defined(CAMERA_MODEL_M5STACK_ESP32CAM)
#define PWDN_GPIO_NUM -1
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM 27
#define SIOD_GPIO_NUM 25
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 19
#define Y8_GPIO_NUM 36
#define Y7_GPIO_NUM 18
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 5
#define Y4_GPIO_NUM 34
#define Y3_GPIO_NUM 35
#define Y2_GPIO_NUM 17
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM 26
#define PCLK_GPIO_NUM 21
#elif defined(CAMERA_MODEL_AI_THINKER)
#define PWDN_GPIO_NUM 32
#define RESET_GPIO_NUM -1
#define XCLK_GPIO_NUM 0
#define SIOD_GPIO_NUM 26
#define SIOC_GPIO_NUM 27
#define Y9_GPIO_NUM 35
#define Y8_GPIO_NUM 34
#define Y7_GPIO_NUM 39
#define Y6_GPIO_NUM 36
#define Y5_GPIO_NUM 21
#define Y4_GPIO_NUM 19
#define Y3_GPIO_NUM 18
#define Y2_GPIO_NUM 5
#define VSYNC_GPIO_NUM 25
#define HREF_GPIO_NUM 23
#define PCLK_GPIO_NUM 22
#elif defined(CAMERA_MODEL_TTGO_T_JOURNAL)
#define PWDN_GPIO_NUM 0
#define RESET_GPIO_NUM 15
#define XCLK_GPIO_NUM 27
#define SIOD_GPIO_NUM 25
#define SIOC_GPIO_NUM 23
#define Y9_GPIO_NUM 19
#define Y8_GPIO_NUM 36
#define Y7_GPIO_NUM 18
#define Y6_GPIO_NUM 39
#define Y5_GPIO_NUM 5
#define Y4_GPIO_NUM 34
#define Y3_GPIO_NUM 35
#define Y2_GPIO_NUM 17
#define VSYNC_GPIO_NUM 22
#define HREF_GPIO_NUM 26
#define PCLK_GPIO_NUM 21
#else
#error "Camera model not selected"
#endif
import cv2
from urllib.request import urlopen, Request
import numpy as np
def nothing(x):
pass
#change the IP address below according to the
#IP shown in the Serial monitor of Arduino code
url='http://192.168.1.107/cam-lo.jpg'
# url='http://192.168.1.107/cam-hi.jpg'
# url='http://192.168.1.107/cam-mid.jpg'
cv2.namedWindow("live transmission", cv2.WINDOW_AUTOSIZE)
while True:
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
req = Request(url, headers=header)
img_resp = urlopen(req, timeout=60)
imgnp=np.array(bytearray(img_resp.read()),dtype=np.uint8)
frame=cv2.imdecode(imgnp,-1)
cv2.imshow("live transmission", frame)
key=cv2.waitKey(5)
if key==ord('q'):
break
cv2.destroyAllWindows()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment