Many excellent document mobile apps support not only image capture, but also edge detection and perspective transformation. If you are interested in these computer vision technologies, you can use OpenCV to create a free document scanner app yourself. In this post, I want to share how to use OpenCV-Python to create a web document scanner step by step.
Setting Up Environment
Download Python 3.5.
Install Flask:
pip3 install flask
Install OpenCV 3.3.0 for Python:
pip3 install opencv-python
Download the latest NumPy 1.11.2. Unzip the package and build it:
python3 setup.py build install
To compile NumPy source code on Windows 10, install Microsoft Visual C++ Compiler for Python 2.7.
Web Document Scanner
Article and Code References
- https://github.com/vipul-sharma20/document-scanner
- http://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/
- http://docs.opencv.org/3.1.0/dd/d49/tutorial_py_contour_features.html
- https://en.wikipedia.org/wiki/Ramer%E2%80%93Douglas%E2%80%93Peucker_algorithm
- http://www.pyimagesearch.com/2015/04/06/zero-parameter-automatic-canny-edge-detection-with-python-and-opencv/
Steps of Building the App
Create document.py to do edge detection and perspective transformation:
import cv2 import rect import numpy as np class Scanner(object): # http://www.pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/ def four_point_transform(self, image, rect): # obtain a consistent order of the points and unpack them # individually (tl, tr, br, bl) = rect # compute the width of the new image, which will be the # maximum distance between bottom-right and bottom-left # x-coordiates or the top-right and top-left x-coordinates widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) maxWidth = max(int(widthA), int(widthB)) # compute the height of the new image, which will be the # maximum distance between the top-right and bottom-right # y-coordinates or the top-left and bottom-left y-coordinates heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) maxHeight = max(int(heightA), int(heightB)) # now that we have the dimensions of the new image, construct # the set of destination points to obtain a "birds eye view", # (i.e. top-down view) of the image, again specifying points # in the top-left, top-right, bottom-right, and bottom-left # order dst = np.array([ [0, 0], [maxWidth - 1, 0], [maxWidth - 1, maxHeight - 1], [0, maxHeight - 1]], dtype = "float32") # compute the perspective transform matrix and then apply it M = cv2.getPerspectiveTransform(rect, dst) warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) # return the warped image return warped # https://github.com/vipul-sharma20/document-scanner def detect_edge(self, image, enabled_transform = False): dst = None orig = image.copy() gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(blurred, 0, 20) _, contours, _ = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) contours = sorted(contours, key=cv2.contourArea, reverse=True) for cnt in contours: epsilon = 0.051 * cv2.arcLength(cnt, True) approx = cv2.approxPolyDP(cnt, epsilon, True) if len(approx) == 4: target = approx cv2.drawContours(image, [target], -1, (0, 255, 0), 2) if enabled_transform: approx = rect.rectify(target) # pts2 = np.float32([[0,0],[800,0],[800,800],[0,800]]) # M = cv2.getPerspectiveTransform(approx,pts2) # dst = cv2.warpPerspective(orig,M,(800,800)) dst = self.four_point_transform(orig, approx) break return image, dst
Create camera.py to capture frames from a camera:
import cv2 from document import Scanner class VideoCamera(object): def __init__(self): # Open a camera self.cap = cv2.VideoCapture(2) # Initialize video recording environment self.is_record = False self.out = None self.transformed_frame = None self.scanner = Scanner() self.cached_frame = None def __del__(self): self.cap.release() def get_video_frame(self): ret, frame = self.cap.read() if ret: frame, _ = self.scanner.detect_edge(frame) self.cached_frame = frame ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tobytes() else: return None def capture_frame(self): ret, frame = self.cap.read() if ret: _, frame = self.scanner.detect_edge(frame, True) ret, jpeg = cv2.imencode('.jpg', frame) self.transformed_frame = jpeg.tobytes() else: return None def get_cached_frame(self): return self.cached_frame def get_image_frame(self): return self.transformed_frame
Note: if you have only one device connected, the parameter in cv2.VideoCapture() should be 0.
Create server.py to stream camera frames to your web client:
from flask import Flask, render_template, Response, jsonify, request from camera import VideoCamera app = Flask(__name__) video_camera = None @app.route('/') def index(): return render_template('index.html') @app.route('/capture_status', methods=['POST']) def capture_status(): global video_camera if video_camera == None: video_camera = VideoCamera() json = request.get_json() status = json['status'] if status == "true": video_camera.capture_frame() return jsonify(result="done") def video_frame(): global video_camera if video_camera == None: video_camera = VideoCamera() while True: frame = video_camera.get_video_frame() if frame is not None: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') else: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + video_camera.get_cached_frame() + b'\r\n\r\n') def image_frame(): global video_camera if video_camera == None: video_camera = VideoCamera() frame = video_camera.get_image_frame() if frame is not None: yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n') @app.route('/video_viewer') def video_viewer(): return Response(video_frame(), mimetype='multipart/x-mixed-replace; boundary=frame') @app.route('/image_viewer') def image_viewer(): return Response(image_frame(), mimetype='multipart/x-mixed-replace; boundary=frame') if __name__ == '__main__': app.run(host='0.0.0.0', threaded=True)
Run the app:
python server.py
Source Code
https://github.com/yushulx/web-document-scanner
The post How to Build Web Document Scanner Using OpenCV-Python appeared first on Code Pool.