OpenCV图像处理类

人工智能 2025-04-17 09:58:57

import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt

class OpenCVImageProcessing:
def __init__(self):
pass

def show_and_destroy_image(self, img, window_name='draw', use_plt=False):
"""
显示图像并等待按键，最后销毁窗口。

参数:
img (np.ndarray): 要显示的图像。
window_name (str): 窗口的名称，默认为 'draw'。
use_plt (bool): 是否使用matplotlib显示图像，默认为False。
"""
if use_plt:
plt.imshow(img, cmap='gray')
plt.title(window_name)
plt.xticks([]), plt.yticks([])
plt.show()
else:
cv.imshow(window_name, img)
cv.waitKey(0)
cv.destroyAllWindows()

def list_color_space_flags(self):
"""
列出所有以 'COLOR_' 开头的颜色空间转换标志。

引用实例:
opencv = OpenCVImageProcessing()
opencv.list_color_space_flags()
"""
flags = [i for i in dir(cv) if i.startswith('COLOR_')]
print(flags)

def object_tracking(self, color_lower=np.array([110, 50, 50]), color_upper=np.array([130, 255, 255])):
"""
进行对象追踪，默认追踪蓝色对象。

参数:
color_lower (np.ndarray): 颜色的下限，格式为 [H, S, V]，默认为蓝色下限。
color_upper (np.ndarray): 颜色的上限，格式为 [H, S, V]，默认为蓝色上限。

引用实例:
opencv = OpenCVImageProcessing()
opencv.object_tracking()
"""
cap = cv.VideoCapture(0)
while True:
_, frame = cap.read()
hsv = cv.cvtColor(frame, cv.COLOR_BGR2HSV)
mask = cv.inRange(hsv, color_lower, color_upper)
res = cv.bitwise_and(frame, frame, mask=mask)
cv.imshow('frame', frame)
cv.imshow('mask', mask)
cv.imshow('res', res)
k = cv.waitKey(5) & 0xFF
if k == 27:
break
cap.release()
cv.destroyAllWindows()

def find_hsv_value(self, bgr_color=np.uint8([[[0, 255, 0]]])):
"""
找到给定BGR颜色对应的HSV值。

参数:
bgr_color (np.ndarray): BGR颜色值，格式为 [[[B, G, R]]]，默认为绿色。

引用实例:
opencv = OpenCVImageProcessing()
opencv.find_hsv_value()
"""
hsv_color = cv.cvtColor(bgr_color, cv.COLOR_BGR2HSV)
print(hsv_color)

def image_scaling(self, image_path, fx=2, fy=2, interpolation=cv.INTER_CUBIC):
"""
对图像进行缩放操作。

参数:
image_path (str): 图像文件的路径。
fx (float): 水平方向的缩放因子，默认为2。
fy (float): 垂直方向的缩放因子，默认为2。
interpolation (int): 插值方法，默认为 cv.INTER_CUBIC。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_scaling('66.png', 1.5, 1.5)
"""
img = cv.imread(image_path)
res = cv.resize(img, None, fx=fx, fy=fy, interpolation=interpolation)
self.show_and_destroy_image(res, 'Scaled Image')

def image_translation(self, image_path, tx=100, ty=50):
"""
对图像进行平移操作。

参数:
image_path (str): 图像文件的路径。
tx (int): 水平方向的平移量，默认为100。
ty (int): 垂直方向的平移量，默认为50。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_translation('66.png', 200, 100)
"""
img = cv.imread(image_path, 0)
rows, cols = img.shape
M = np.float32([[1, 0, tx], [0, 1, ty]])
dst = cv.warpAffine(img, M, (cols, rows))
self.show_and_destroy_image(dst, 'Translated Image')

def image_rotation(self, image_path, angle=90, scale=1):
"""
对图像进行旋转操作。

参数:
image_path (str): 图像文件的路径。
angle (float): 旋转角度，默认为90度。
scale (float): 缩放比例，默认为1。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_rotation('66.png', 45)
"""
img = cv.imread(image_path, 0)
rows, cols = img.shape
M = cv.getRotationMatrix2D(((cols - 1) / 2.0, (rows - 1) / 2.0), angle, scale)
dst = cv.warpAffine(img, M, (cols, rows))
self.show_and_destroy_image(dst, 'Rotated Image')

def image_affine_transform(self, image_path, pts1=np.float32([[50, 50], [200, 50], [50, 200]]),
pts2=np.float32([[10, 100], [200, 50], [100, 250]])):
"""
对图像进行仿射变换操作。

参数:
image_path (str): 图像文件的路径。
pts1 (np.ndarray): 原始图像中的三个点，格式为 [[x1, y1], [x2, y2], [x3, y3]]。
pts2 (np.ndarray): 目标图像中的三个点，格式为 [[x1, y1], [x2, y2], [x3, y3]]。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_affine_transform('66.png')
"""
img = cv.imread(image_path)
rows, cols, ch = img.shape
M = cv.getAffineTransform(pts1, pts2)
dst = cv.warpAffine(img, M, (cols, rows))
plt.subplot(121), plt.imshow(img), plt.title('Input')
plt.subplot(122), plt.imshow(dst), plt.title('Output')
plt.show()

def image_perspective_transform(self, image_path, pts1=np.float32([[56, 65], [368, 52], [28, 387], [389, 390]]),
pts2=np.float32([[0, 0], [300, 0], [0, 300], [300, 300]])):
"""
对图像进行透视变换操作。

参数:
image_path (str): 图像文件的路径。
pts1 (np.ndarray): 原始图像中的四个点，格式为 [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]。
pts2 (np.ndarray): 目标图像中的四个点，格式为 [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_perspective_transform('66.png')
"""
img = cv.imread(image_path)
rows, cols, ch = img.shape
M = cv.getPerspectiveTransform(pts1, pts2)
dst = cv.warpPerspective(img, M, (300, 300))
plt.subplot(121), plt.imshow(img), plt.title('Input')
plt.subplot(122), plt.imshow(dst), plt.title('Output')
plt.show()

def simple_thresholding(self, image_path, threshold=127, max_value=255):
"""
对图像进行简单阈值处理。

参数:
image_path (str): 图像文件的路径。
threshold (int): 阈值，默认为127。
max_value (int): 最大值，默认为255。

引用实例:
opencv = OpenCVImageProcessing()
opencv.simple_thresholding('66.png', 100)
"""
img = cv.imread(image_path, 0)
ret, thresh1 = cv.threshold(img, threshold, max_value, cv.THRESH_BINARY)
ret, thresh2 = cv.threshold(img, threshold, max_value, cv.THRESH_BINARY_INV)
ret, thresh3 = cv.threshold(img, threshold, max_value, cv.THRESH_TRUNC)
ret, thresh4 = cv.threshold(img, threshold, max_value, cv.THRESH_TOZERO)
ret, thresh5 = cv.threshold(img, threshold, max_value, cv.THRESH_TOZERO_INV)
titles = ['Original Image', 'BINARY', 'BINARY_INV', 'TRUNC', 'TOZERO', 'TOZERO_INV']
images = [img, thresh1, thresh2, thresh3, thresh4, thresh5]
for i in range(6):
plt.subplot(2, 3, i + 1), plt.imshow(images[i], 'gray')
plt.title(titles[i])
plt.xticks([]), plt.yticks([])
plt.show()

def otsu_thresholding(self, image_path):
"""
对图像进行Otsu的二值化处理。

参数:
image_path (str): 图像文件的路径。

引用实例:
opencv = OpenCVImageProcessing()
opencv.otsu_thresholding('66.png')
"""
img = cv.imread(image_path, 0)
ret1, th1 = cv.threshold(img, 127, 255, cv.THRESH_BINARY)
ret2, th2 = cv.threshold(img, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
blur = cv.GaussianBlur(img, (5, 5), 0)
ret3, th3 = cv.threshold(blur, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
images = [img, 0, th1,
img, 0, th2,
blur, 0, th3]
titles = ['Original Noisy Image', 'Histogram', 'Global Thresholding (v=127)',
'Original Noisy Image', 'Histogram', "Otsu's Thresholding",
'Gaussian filtered Image', 'Histogram', "Otsu's Thresholding"]
for i in range(3):
plt.subplot(3, 3, i * 3 + 1), plt.imshow(images[i * 3], 'gray')
plt.title(titles[i * 3]), plt.xticks([]), plt.yticks([])
plt.subplot(3, 3, i * 3 + 2), plt.hist(images[i * 3].ravel(), 256)
plt.title(titles[i * 3 + 1]), plt.xticks([]), plt.yticks([])
plt.subplot(3, 3, i * 3 + 3), plt.imshow(images[i * 3 + 2], 'gray')
plt.title(titles[i * 3 + 2]), plt.xticks([]), plt.yticks([])
plt.show()

def otsu_thresholding_implementation(self, image_path):
"""
手动实现Otsu的二值化处理。

参数:
image_path (str): 图像文件的路径。

引用实例:
opencv = OpenCVImageProcessing()
opencv.otsu_thresholding_implementation('66.png')
"""
img = cv.imread(image_path, 0)
blur = cv.GaussianBlur(img, (5, 5), 0)
hist = cv.calcHist([blur], [0], None, [256], [0, 256])
hist_norm = hist.ravel() / hist.max()
Q = hist_norm.cumsum()
bins = np.arange(256)
fn_min = np.inf
thresh = -1
for i in range(1, 256):
p1, p2 = np.hsplit(hist_norm, [i])
q1, q2 = Q[i], Q[255] - Q[i]
b1, b2 = np.hsplit(bins, [i])
m1, m2 = np.sum(p1 * b1) / q1, np.sum(p2 * b2) / q2
v1, v2 = np.sum(((b1 - m1) ** 2) * p1) / q1, np.sum(((b2 - m2) ** 2) * p2) / q2
fn = v1 * q1 + v2 * q2
if fn < fn_min:
fn_min = fn
thresh = i
ret, otsu = cv.threshold(blur, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)
print(f"{thresh} {ret}")

def image_filtering_2d(self, image_path, kernel_size=5):
"""
对图像进行2D卷积（图像过滤）操作。

参数:
image_path (str): 图像文件的路径。
kernel_size (int): 卷积核的大小，默认为5。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_filtering_2d('66.png', 3)
"""
img = cv.imread(image_path)
kernel = np.ones((kernel_size, kernel_size), np.float32) / (kernel_size * kernel_size)
dst = cv.filter2D(img, -1, kernel)
plt.subplot(121), plt.imshow(img), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(dst), plt.title('Averaging')
plt.xticks([]), plt.yticks([])
plt.show()

def image_blurring(self, image_path, blur_type='bilateral', kernel_size=5, sigma=75):
"""
对图像进行模糊（平滑）操作。

参数:
image_path (str): 图像文件的路径。
blur_type (str): 模糊类型，可选值为 'average', 'gaussian', 'median', 'bilateral'，默认为 'bilateral'。
kernel_size (int): 卷积核的大小，默认为5。
sigma (int): 双边滤波的标准差，默认为75。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_blurring('66.png', 'gaussian', 3)
"""
img = cv.imread(image_path)
if blur_type == 'average':
blur = cv.blur(img, (kernel_size, kernel_size))
elif blur_type == 'gaussian':
blur = cv.GaussianBlur(img, (kernel_size, kernel_size), 0)
elif blur_type == 'median':
blur = cv.medianBlur(img, kernel_size)
elif blur_type == 'bilateral':
blur = cv.bilateralFilter(img, 9, sigma, sigma)
else:
print("Invalid blur type.")
return
plt.subplot(121), plt.imshow(img), plt.title('Original')
plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(blur), plt.title('Blurred')
plt.xticks([]), plt.yticks([])
plt.show()

def morphological_transformations(self, image_path, kernel_size=5, iterations=1):
"""
对图像进行形态学转换操作。

参数:
image_path (str): 图像文件的路径。
kernel_size (int): 卷积核的大小，默认为5。
iterations (int): 迭代次数，默认为1。

引用实例:
opencv = OpenCVImageProcessing()
opencv.morphological_transformations('66.png', 3, 2)
"""
img = cv.imread(image_path, 0)
kernel = np.ones((kernel_size, kernel_size), np.uint8)
erosion = cv.erode(img, kernel, iterations=iterations)
dilation = cv.dilate(img, kernel, iterations=iterations)
opening = cv.morphologyEx(img, cv.MORPH_OPEN, kernel)
closing = cv.morphologyEx(img, cv.MORPH_CLOSE, kernel)
gradient = cv.morphologyEx(img, cv.MORPH_GRADIENT, kernel)
tophat = cv.morphologyEx(img, cv.MORPH_TOPHAT, kernel)
blackhat = cv.morphologyEx(img, cv.MORPH_BLACKHAT, kernel)
self.show_and_destroy_image(blackhat, 'Blackhat')

def image_gradients(self, image_path):
"""
对图像进行梯度计算，包括Sobel、Scharr和Laplacian算子。

参数:
image_path (str): 图像文件的路径。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_gradients('66.png')
"""
img = cv.imread(image_path, 0)
laplacian = cv.Laplacian(img, cv.CV_64F)
sobelx = cv.Sobel(img, cv.CV_64F, 1, 0, ksize=5)
sobely = cv.Sobel(img, cv.CV_64F, 0, 1, ksize=5)
plt.subplot(2, 2, 1), plt.imshow(img, cmap='gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 2), plt.imshow(laplacian, cmap='gray')
plt.title('Laplacian'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 3), plt.imshow(sobelx, cmap='gray')
plt.title('Sobel X'), plt.xticks([]), plt.yticks([])
plt.subplot(2, 2, 4), plt.imshow(sobely, cmap='gray')
plt.title('Sobel Y'), plt.xticks([]), plt.yticks([])
plt.show()

def sobel_dtype_comparison(self, image_path):
"""
比较Sobel算子不同数据类型的输出结果。

参数:
image_path (str): 图像文件的路径。

引用实例:
opencv = OpenCVImageProcessing()
opencv.sobel_dtype_comparison('66.png')
"""
img = cv.imread(image_path, 0)
sobelx8u = cv.Sobel(img, cv.CV_8U, 1, 0, ksize=5)
sobelx64f = cv.Sobel(img, cv.CV_64F, 1, 0, ksize=5)
abs_sobel64f = np.absolute(sobelx64f)
sobel_8u = np.uint8(abs_sobel64f)
plt.subplot(1, 3, 1), plt.imshow(img, cmap='gray')
plt.title('Original'), plt.xticks([]), plt.yticks([])
plt.subplot(1, 3, 2), plt.imshow(sobelx8u, cmap='gray')
plt.title('Sobel CV_8U'), plt.xticks([]), plt.yticks([])
plt.subplot(1, 3, 3), plt.imshow(sobel_8u, cmap='gray')
plt.title('Sobel abs(CV_64F)'), plt.xticks([]), plt.yticks([])
plt.show()

def canny_edge_detection(self, image_path, threshold1=100, threshold2=200):
"""
对图像进行Canny边缘检测。

参数:
image_path (str): 图像文件的路径。
threshold1 (int): 第一个阈值，默认为100。
threshold2 (int): 第二个阈值，默认为200。

引用实例:
opencv = OpenCVImageProcessing()
opencv.canny_edge_detection('66.png', 50, 150)
"""
img = cv.imread(image_path, 0)
edges = cv.Canny(img, threshold1, threshold2)
plt.subplot(121), plt.imshow(img, cmap='gray')
plt.title('Original Image'), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(edges, cmap='gray')
plt.title('Edge Image'), plt.xticks([]), plt.yticks([])
plt.show()

def image_pyramid_blending(self, image_path1, image_path2, levels=6):
"""
对两张图像进行金字塔融合操作。

参数:
image_path1 (str): 第一张图像文件的路径。
image_path2 (str): 第二张图像文件的路径。
levels (int): 金字塔的层数，默认为6。

引用实例:
opencv = OpenCVImageProcessing()
opencv.image_pyramid_blending('1.jpg', '0.jpg')
"""
A = cv.imread(image_path1)
B = cv.imread(image_path2)
G = A.copy()
gpA = [G]
for i in range(levels):
G = cv.pyrDown(G)
gpA.append(G)
G = B.copy()
gpB = [G]
for i in range(levels):
G = cv.pyrDown(G)
gpB.append(G)
lpA = [gpA[levels - 1]]
for i in range(levels - 1, 0, -1):
GE = cv.pyrUp(gpA[i])
L = cv.subtract(gpA[i - 1], GE)
lpA.append(L)
lpB = [gpB[levels - 1]]
for i in range(levels - 1, 0, -1):
GE = cv.pyrUp(gpB[i])
L = cv.subtract(gpB[i - 1], GE)
lpB.append(L)
LS = []
for la, lb in zip(lpA, lpB):
rows, cols, dpt = la.shape
ls = np.hstack((la[:, 0:cols // 2], lb[:, cols // 2:]))
LS.append(ls)
ls_ = LS[0]
for i in range(1, levels):
ls_ = cv.pyrUp(ls_)
ls_ = cv.add(ls_, LS[i])
real = np.hstack((A[:, :cols // 2], B[:, cols // 2:]))
cv.imwrite('Pyramid_blending2.jpg', ls_)
cv.imwrite('Direct_blending.jpg', real)