文本提取和分段打开CV

我之前从未使用过OpenCV,但我正在尝试编写神经网络系统来识别文本,我需要一些文本提取/分割工具。

如何使用java OpenCV预处理和分割包含文本的图像。
我不需要识别文本,我只需要将每个字母放在单独的图像中。
像这样的东西:
在此处输入图像描述

试试这个代码。不需要OpenCV

import java.awt.image.BufferedImage; import java.util.ArrayList; import java.util.List; import org.neuroph.imgrec.ImageUtilities; public class CharExtractor { private int cropTopY = 0;//up locked coordinate private int cropBottomY = 0;//down locked coordinate private int cropLeftX = 0;//left locked coordinate private int cropRightX = 0;//right locked coordinate private BufferedImage imageWithChars = null; private boolean endOfImage;//end of picture private boolean endOfRow;//end of current reading row /** * Creates new char extractor with soecified text image * @param imageWithChars - image with text */ public CharExtractor(BufferedImage imageWithChars) { this.imageWithChars = imageWithChars; } public void setImageWithChars(BufferedImage imageWithChars) { this.imageWithChars = imageWithChars; } /** * This method scans image pixels until it finds the first black pixel (TODO: use foreground color which is black by default). * When it finds black pixel, it sets cropTopY and returns true. if it reaches end of image and does not find black pixels, * it sets endOfImage flag and returns false. * @return - returns true when black pixel is found and cropTopY value is changed, and false if cropTopY value is not changed */ private boolean findCropTopY() { for (int y = cropBottomY; y < imageWithChars.getHeight(); y++) { // why cropYDown? - for multiple lines of text using cropBottomY from previous line above; for first line its zero for (int x = cropLeftX; x < imageWithChars.getWidth(); x++) { // scan starting from the previous left crop position - or it shoud be right??? if (imageWithChars.getRGB(x, y) == -16777216) { // if its black rixel (also consider condition close to black or not white or different from background) this.cropTopY = y; // save the current y coordiante return true; // and return true } } } endOfImage = true; //sets this flag if no black pixels are found return false; // and return false } /** * This method scans image pixels until it finds first row with white pixels. (TODO: background color which is white by default). * When it finds line whith all white pixels, it sets cropBottomY and returns true * @return - returns true when cropBottomY value is set, false otherwise */ private boolean findCropBottomY() { for (int y = cropTopY + 1; y < imageWithChars.getHeight(); y++) { // scan image from top to bottom int whitePixCounter = 0; //counter of white pixels in a row for (int x = cropLeftX; x < imageWithChars.getWidth(); x++) { // scan all pixels to right starting from left crop position if (imageWithChars.getRGB(x, y) == -1) { // if its white pixel whitePixCounter++; // increase counter } } if (whitePixCounter == imageWithChars.getWidth()-1) { // if we have reached end of line counting white pixels (x pos) cropBottomY = y;// that means that we've found white line, so set current y coordinate minus 1 return true; // as cropBottomY and finnish with true } if (y == imageWithChars.getHeight() - 1) { // if we have reached end of image cropBottomY = y; // set crop bottom endOfImage = true; // set corresponding endOfImage flag return true; // and return true } } return false; // this should never happen, however its possible if image has non white bg } private boolean findCropLeftX() { int whitePixCounter = 0; // white pixel counter between the letters for (int x = cropRightX; x < imageWithChars.getWidth(); x++) { // start from previous righ crop position (previous letter), and scan following pixels to the right for (int y = cropTopY; y <= cropBottomY; y++) { // vertical pixel scan at current x coordinate if (imageWithChars.getRGB(x, y) == -16777216) { // when we find black pixel cropLeftX = x; // set cropLeftX return true; // and return true } } // BUG?: this condition looks strange.... we might not need whitePixCounter at all, it might be used for 'I' letter whitePixCounter++; // if its not black pixel assume that its white pixel if (whitePixCounter == 3) { // why 3 pixels? its hard coded for some case and does not work in general...!!! whitePixCounter = 0; // why does it sets to zero, this has no purporse at all... } } endOfRow = true; // if we have reached end of row and we have not found black pixels, set the endOfRow flag return false; // and return false } /** * This method scans image pixels to the right until it finds next row where all pixel are white, y1 and y2. * @return - return true when x2 value is changed and false when x2 value is not changed */ private boolean findCropRightX() { for (int x = cropLeftX + 1; x < imageWithChars.getWidth(); x++) { // start from current cropLeftX position and scan pixels to the right int whitePixCounter = 0; for (int y = cropTopY; y <= cropBottomY; y++) { // vertical pixel scan at current x coordinate if (imageWithChars.getRGB(x, y) == -1) { // if we have white pixel at current (x, y) whitePixCounter++; // increase whitePixCounter } } // this is for space! int heightPixels = cropBottomY - cropTopY; // calculate crop height if (whitePixCounter == heightPixels+1) { // if white pixel count is equal to crop height+1 then this is white vertical line, means end of current char/ (+1 is for case when there is only 1 pixel; a 'W' bug fix) cropRightX = x; // so set cropRightX return true; // and return true } // why we need this when we allready have condiiton in the for loop? - for the last letter in the row. if (x == imageWithChars.getWidth() - 1) { // if we have reached end of row with x position cropRightX = x; // set cropRightX endOfRow = true; // set endOfRow flag return true; // and return true } } } public List extractCharImagesToRecognize() { List trimedImages = new ArrayList(); int i = 0; while (endOfImage == false) { endOfRow = false; boolean foundTop = findCropTopY(); boolean foundBottom = false; if (foundTop == true) { foundBottom = findCropBottomY(); if (foundBottom == true) { while (endOfRow == false) { boolean foundLeft = false; boolean foundRight = false; foundLeft = findCropLeftX(); if (foundLeft == true) { foundRight = findCropRightX(); if (foundRight == true) { BufferedImage image = ImageUtilities.trimImage(ImageUtilities.cropImage(imageWithChars, cropLeftX, cropTopY, cropRightX, cropBottomY)); trimedImages.add(image); i++; } } } cropLeftX = 0; cropRightX = 0; } } } cropTopY = 0; cropBottomY = 0; endOfImage = false; return trimedImages; } public static void main(String[] args) throws Exception { File f=new File("./written.png"); BufferedImage img=ImageIO.read(f); CharExtractor ch=new CharExtractor(img); List list=ch.extractCharImagesToRecognize(); for(int i=0;i 

你要做的是一般的场景文本本地化问题,而且非常困难。 查看这篇文章寻找灵感 – http://www.maseltov.eu/wp-content/uploads/2014/02/CTU-03_Real-Time-Scene-Text-Localization-and-Recognition.pdf

你能做的是:

  • 编写一个从图像中提取MSER对象的程序
  • 从每个MSER确定的每个补丁中提取特征(文章中概述了哪些特征)
  • 训练你的分类器(在你的情况下,我猜一个神经网络?),以便它能够区分字符和非字符区域
  • 编写一个程序,使用您的分类器提取MSER并使用经过训练的NN对它们进行分类。

MSER算法在OpenCV中实现,因此这是一个优点。 那里也有神经网络分类器,但由于我只使用SVM,所以我不能过多评论。 我应该说我们必须解决这个问题,使用OpenCV是完全可能的。 只是不要指望将所有东西放在银盘上 – 涉及很多工作; 特别是在选择和提取blob特征时。

我不熟悉神经网络,但如果你只是想在图像中找到关于比例和旋转的字母,我可以推荐这个项目http://www.codeproject.com/Articles/196168/Contour-Analysis-for- C语言中的图像识别它是用C#编写的,但您可以将其移植到Java或至少获得有关此主题的大量见解。