Paddle OCR¶
In [1]:
Copied!
from omnidocs.tasks.ocr_extraction.extractors.paddle import PaddleOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.paddle import PaddleOCRExtractor
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn(
In [ ]:
Copied!
image_path = "../../../../tests/ocr_extraction/assets/invoice.jpg"
extractor = PaddleOCRExtractor()
result = extractor.extract(image_path)
print(f"'{result.full_text[:200]}...'")
image_path = "../../../../tests/ocr_extraction/assets/invoice.jpg"
extractor = PaddleOCRExtractor()
result = extractor.extract(image_path)
print(f"'{result.full_text[:200]}...'")
INFO [timestamp]2025-07-31 12:36:15[/] | [logger.name]omnidocs.tasks.ocr_extraction.extractors.paddle[/] | [function]logging.py:150[/] | [info]extract completed in 5.90s[/]
INFO [timestamp]2025-07-31 12:36:15[/] | [logger.name]omnidocs.tasks.ocr_extraction.extractors.paddle[/] | [function]logging.py:150[/] | [info]extract completed in 5.90s[/]
[2025-07-31 12:36:15,484] [ INFO] logging.py:150 - extract completed in 5.90s
'Invoice Account number:PAT20-32 Need help? Normal business hours are: Invoice number6312 Monday-Friday 8:00 am to 5:00 pm LABOR HOURLY RATE HOURS AMOUNT Jamle M. $45.00 82 $3,690.00 Tarric W. $36.00 6...'
In [ ]:
Copied!