| # ... |
| # |
| # Copyright (c) 2019 Philippe Mathieu-Daudé <f4bug@amsat.org> |
| # |
| # This work is licensed under the terms of the GNU GPL, version 2 or |
| # later. See the COPYING file in the top-level directory. |
| |
| import logging |
| from subprocess import run |
| |
| |
| def tesseract_ocr(image_path, tesseract_args=''): |
| console_logger = logging.getLogger('console') |
| console_logger.debug(image_path) |
| proc = run(['tesseract', image_path, 'stdout'], |
| capture_output=True, encoding='utf8') |
| if proc.returncode: |
| return None |
| lines = [] |
| for line in proc.stdout.split('\n'): |
| sline = line.strip() |
| if len(sline): |
| console_logger.debug(sline) |
| lines += [sline] |
| return lines |