图像处理实战指南：从基础操作到特征提取的完整流程解析

张开发

• 2026/6/5 2:23:36 • 15 分钟阅读

分享文章

1. 图像处理基础操作入门第一次接触图像处理时我被那些专业术语吓得不轻。但后来发现很多看似复杂的概念其实就像给照片做美颜一样简单。咱们就从最基础的图像操作开始用Python和OpenCV来玩转这些功能。先说说图像翻转这可能是最简单的操作了。记得有次我需要把一批证件照统一成面朝右侧手动调整太费时间用代码几行就搞定了import cv2 from matplotlib import pyplot as plt img cv2.imread(photo.jpg) img_flip cv2.flip(img, 1) # 1表示水平翻转 plt.subplot(1,2,1), plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) plt.title(原图) plt.subplot(1,2,2), plt.imshow(cv2.cvtColor(img_flip, cv2.COLOR_BGR2RGB)) plt.title(水平翻转) plt.show()图像锐化是另一个常用操作。有次处理老照片扫描件文字边缘模糊不清锐化后效果立竿见影。常用的拉普拉斯算子就像给图像做了个提神醒脑kernel np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) sharpened cv2.filter2D(img, -1, kernel)图像平滑去噪也很有意思。不同类型的噪声需要不同的处理方法高斯噪声用高斯滤波效果最好椒盐噪声用中值滤波最合适均匀噪声用均值滤波就能搞定实测下来中值滤波对手机拍的夜景照片去噪效果特别好# 不同滤波效果对比 img cv2.imread(noisy_photo.jpg) gaussian cv2.GaussianBlur(img, (5,5), 0) median cv2.medianBlur(img, 5) plt.figure(figsize(15,5)) plt.subplot(131), plt.imshow(img), plt.title(原图) plt.subplot(132), plt.imshow(gaussian), plt.title(高斯滤波) plt.subplot(133), plt.imshow(median), plt.title(中值滤波) plt.show()2. 直方图处理与图像增强技巧直方图是理解图像特性的重要工具。有次我处理一批光照不均的工业检测图像直方图均衡化简直成了救命稻草。不过要注意直接对整图做均衡化有时会过度增强噪声这时候可以试试CLAHE限制对比度自适应直方图均衡化# 普通直方图均衡化 img cv2.imread(low_contrast.jpg, 0) equ cv2.equalizeHist(img) # CLAHE clahe cv2.createCLAHE(clipLimit2.0, tileGridSize(8,8)) cl1 clahe.apply(img) plt.figure(figsize(15,5)) plt.subplot(131), plt.imshow(img, cmapgray), plt.title(原图) plt.subplot(132), plt.imshow(equ, cmapgray), plt.title(普通均衡化) plt.subplot(133), plt.imshow(cl1, cmapgray), plt.title(CLAHE) plt.show()伽马校正也是调节图像亮度的好方法。处理过暗的监控画面时设置gamma0.5效果很惊艳def adjust_gamma(image, gamma1.0): invGamma 1.0 / gamma table np.array([((i / 255.0) ** invGamma) * 255 for i in np.arange(0, 256)]).astype(uint8) return cv2.LUT(image, table) gamma 0.5 adjusted adjust_gamma(img, gammagamma)3. 图像阈值分割实战阈值分割是图像处理的重要环节。大津法(OTSU)是我最常用的自动阈值选择方法它能自动找到最佳分割阈值img cv2.imread(document.jpg,0) ret, thresh cv2.threshold(img,0,255,cv2.THRESH_BINARYcv2.THRESH_OTSU) print(fOTSU算法计算的最佳阈值: {ret}) plt.figure(figsize(10,5)) plt.subplot(121), plt.imshow(img, cmapgray), plt.title(原图) plt.subplot(122), plt.imshow(thresh, cmapgray), plt.title(OTSU分割) plt.show()对于光照不均的图像全局阈值可能效果不好。这时候自适应阈值就派上用场了。处理过一批车间拍摄的零件图像自适应阈值的效果明显优于全局阈值thresh_mean cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) thresh_gauss cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) plt.figure(figsize(15,5)) plt.subplot(131), plt.imshow(img, cmapgray), plt.title(原图) plt.subplot(132), plt.imshow(thresh_mean, cmapgray), plt.title(均值自适应) plt.subplot(133), plt.imshow(thresh_gauss, cmapgray), plt.title(高斯自适应) plt.show()边缘检测是另一种重要的分割方法。Canny边缘检测器效果最好但参数调节需要经验。Sobel算子简单快速适合实时应用edges_sobelx cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize5) edges_sobely cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize5) edges_sobel np.sqrt(edges_sobelx**2 edges_sobely**2) edges_canny cv2.Canny(img, 100, 200) plt.figure(figsize(15,5)) plt.subplot(131), plt.imshow(edges_sobel, cmapgray), plt.title(Sobel边缘) plt.subplot(132), plt.imshow(edges_canny, cmapgray), plt.title(Canny边缘) plt.show()4. 高级特征提取技术特征提取是计算机视觉的核心。轮廓特征是最基础也最实用的计算轮廓的Hu矩可以得到具有平移、旋转和尺度不变性的特征img cv2.imread(shape.png, 0) ret, thresh cv2.threshold(img, 127, 255, 0) contours, hierarchy cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) cnt contours[0] M cv2.moments(cnt) huMoments cv2.HuMoments(M) print(Hu矩特征:) for i in range(7): print(fη{i1}: {huMoments[i][0]})纹理特征对材质识别特别有用。灰度共生矩阵(GLCM)是经典的纹理分析方法可以提取对比度、相关性、能量等特征from skimage.feature import greycomatrix, greycoprops # 计算灰度共生矩阵 glcm greycomatrix(img, distances[5], angles[0], levels256, symmetricTrue, normedTrue) # 提取纹理特征 contrast greycoprops(glcm, contrast) dissimilarity greycoprops(glcm, dissimilarity) homogeneity greycoprops(glcm, homogeneity) energy greycoprops(glcm, energy) correlation greycoprops(glcm, correlation) print(f对比度: {contrast[0][0]}) print(f差异性: {dissimilarity[0][0]}) print(f同质性: {homogeneity[0][0]}) print(f能量: {energy[0][0]}) print(f相关性: {correlation[0][0]})PCA图像压缩是个很酷的技术。有次需要存储大量医学图像用PCA压缩后节省了60%空间而关键信息几乎没损失from sklearn.decomposition import PCA # 将图像展开为向量 h, w img.shape img_vector img.reshape(1, h*w) # PCA降维 pca PCA(n_components100) # 保留前100个主成分 img_pca pca.fit_transform(img_vector) # 重建图像 img_reconstructed pca.inverse_transform(img_pca).reshape(h, w) plt.figure(figsize(10,5)) plt.subplot(121), plt.imshow(img, cmapgray), plt.title(f原图\n大小:{img.nbytes/1024:.1f}KB) plt.subplot(122), plt.imshow(img_reconstructed, cmapgray), plt.title(fPCA压缩\n大小:{img_pca.nbytes/1024:.1f}KB) plt.show()5. 实战项目完整图像处理流程让我们通过一个车牌识别的实际案例把前面学的技术串起来。这个项目需要处理不同光照条件下的车牌图像预处理阶段# 读取图像并转为灰度 img cv2.imread(car_plate.jpg) gray cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 直方图均衡化增强对比度 clahe cv2.createCLAHE(clipLimit2.0, tileGridSize(8,8)) enhanced clahe.apply(gray) # 高斯模糊去噪 blurred cv2.GaussianBlur(enhanced, (5,5), 0)车牌定位# 边缘检测 edges cv2.Canny(blurred, 50, 150) # 查找轮廓 contours, _ cv2.findContours(edges.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) # 筛选可能是车牌的轮廓 plate_contours [] for cnt in contours: x,y,w,h cv2.boundingRect(cnt) aspect_ratio w / float(h) if 2.5 aspect_ratio 4.5 and w 100 and h 30: plate_contours.append(cnt)字符分割# 对定位到的车牌区域进行二值化 plate img[y:yh, x:xw] plate_gray cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY) _, plate_thresh cv2.threshold(plate_gray, 0, 255, cv2.THRESH_BINARY_INVcv2.THRESH_OTSU) # 投影法分割字符 horizontal_proj np.sum(plate_thresh, axis1) vertical_proj np.sum(plate_thresh, axis0) # 根据投影直方图的波峰波谷确定字符边界 # ... (具体实现代码较长省略部分细节)字符识别# 提取每个字符的HOG特征 def extract_hog(image): # 计算HOG特征描述符 winSize (32,32) blockSize (16,16) blockStride (8,8) cellSize (8,8) nbins 9 hog cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins) features hog.compute(image) return features # 加载预训练的SVM分类器 svm cv2.ml.SVM_load(char_svm_model.xml) # 对每个字符进行分类 for char_img in segmented_chars: features extract_hog(char_img) result svm.predict(features.reshape(1,-1))[1] print(f预测字符: {chr(int(result[0]))})这个完整流程展示了如何将基础图像处理技术与高级特征提取方法结合解决实际问题。每个步骤都可能需要根据具体场景调整参数这也是图像处理既具挑战性又充满乐趣的地方。