Tesseract¶
In [1]:
Copied!
from omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr import TesseractOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr import TesseractOCRExtractor
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn(
In [2]:
Copied!
image_path = "../../../../tests/ocr_extraction/assets/invoice.jpg"
extractor = TesseractOCRExtractor()
result = extractor.extract(image_path)
print(f"'{result.full_text[:200]}...'")
image_path = "../../../../tests/ocr_extraction/assets/invoice.jpg"
extractor = TesseractOCRExtractor()
result = extractor.extract(image_path)
print(f"'{result.full_text[:200]}...'")
INFO [timestamp]2025-07-31 13:05:01[/] | [logger.name]omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr[/] | [function]logging.py:150[/] | [info]extract completed in 2.11s[/]
INFO [timestamp]2025-07-31 13:05:01[/] | [logger.name]omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr[/] | [function]logging.py:150[/] | [info]extract completed in 2.11s[/]
[2025-07-31 13:05:01,798] [ INFO] logging.py:150 - extract completed in 2.11s
'| . ‘Account number PAT20-92 Need help? Normal business hous ore: Invoice number: 6912 i Monday —Friday 8:00 am to 5:00 pm Lasor HOURLY RATE Hours AMOUNT Jamie M. $45.00 2 $3,690 Taro W. $36.00 oe $2,...'
In [ ]:
Copied!