User:Simon/self directed research/OCR preprocessing: Difference between revisions
No edit summary |
No edit summary |
||
Line 8: | Line 8: | ||
import cv2 | import cv2 | ||
import os | import os | ||
# construct the argument parse and parse the arguments | # construct the argument parse and parse the arguments | ||
ap = argparse.ArgumentParser() | ap = argparse.ArgumentParser() | ||
Line 16: | Line 15: | ||
help="type of preprocessing to be done") | help="type of preprocessing to be done") | ||
args = vars(ap.parse_args()) | args = vars(ap.parse_args()) | ||
# load the example image and convert it to grayscale | # load the example image and convert it to grayscale | ||
image = cv2.imread(args["image"]) | image = cv2.imread(args["image"]) | ||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | ||
# check to see if we should apply thresholding to preprocess the | # check to see if we should apply thresholding to preprocess the | ||
# image | # image | ||
Line 26: | Line 23: | ||
gray = cv2.threshold(gray, 0, 255, | gray = cv2.threshold(gray, 0, 255, | ||
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] | cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] | ||
# make a check to see if median blurring should be done to remove | # make a check to see if median blurring should be done to remove | ||
# noise | # noise | ||
elif args["preprocess"] == "blur": | elif args["preprocess"] == "blur": | ||
gray = cv2.medianBlur(gray, 3) | gray = cv2.medianBlur(gray, 3) | ||
# write the grayscale image to disk as a temporary file so we can | # write the grayscale image to disk as a temporary file so we can | ||
# apply OCR to it | # apply OCR to it | ||
filename = "{}.png".format(os.getpid()) | filename = "{}.png".format(os.getpid()) | ||
cv2.imwrite(filename, gray) | cv2.imwrite(filename, gray) | ||
# load the image as a PIL/Pillow image, apply OCR, and then delete | # load the image as a PIL/Pillow image, apply OCR, and then delete | ||
# the temporary file | # the temporary file | ||
Line 42: | Line 36: | ||
os.remove(filename) | os.remove(filename) | ||
print(text) | print(text) | ||
# show the output images | # show the output images | ||
cv2.imshow("Image", image) | cv2.imshow("Image", image) | ||
cv2.imshow("Output", gray) | cv2.imshow("Output", gray) | ||
cv2.waitKey(0)</code> | cv2.waitKey(0)</code> |
Revision as of 15:21, 5 September 2019
Pre-processing for OCR:
# import the necessary packages #from PIL import Image import pytesseract import argparse import cv2 import os # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args()) # load the example image and convert it to grayscale image = cv2.imread(args["image"]) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # check to see if we should apply thresholding to preprocess the # image if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove # noise elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)
# write the grayscale image to disk as a temporary file so we can # apply OCR to it filename = "{}.png".format(os.getpid()) cv2.imwrite(filename, gray) # load the image as a PIL/Pillow image, apply OCR, and then delete # the temporary file text = pytesseract.image_to_string(Image.open(filename)) os.remove(filename) print(text) # show the output images cv2.imshow("Image", image) cv2.imshow("Output", gray) cv2.waitKey(0)