Ocr test
In [ ]:
Copied!
def test_ocr_extraction():
from omnidocs.tasks.ocr_extraction.extractors.paddle import PaddleOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr import TesseractOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.easy_ocr import EasyOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.surya_ocr import SuryaOCRExtractor
from pathlib import Path
extractors = [PaddleOCRExtractor, TesseractOCRExtractor, EasyOCRExtractor, SuryaOCRExtractor]
image_path = Path("Omnidocs") / "tests" / "ocr_extraction" / "assets" / "invoice.jpg"
for extractor_cls in extractors:
print(f"\nTesting {extractor_cls.__name__}")
print("-" * 40)
try:
extractor = extractor_cls()
result = extractor.extract(image_path)
print(f"Text length: {len(result.full_text)} chars")
vis_path = f"visualized_{extractor_cls.__name__}.png"
extractor.visualize(result, image_path, vis_path)
#load and visualize, if already saved as json
#extractor.visualize_from_json("image.jpg", "results.json", "viz.png")
# with custom styling
extractor.visualize(
result,
image_path,
f"styled_{extractor_cls.__name__}.png",
box_color='green',
box_width=3,
show_text=True,
text_color='red'
)
print("SUCCESS")
except Exception as e:
print(f"ERROR: {e}")
def test_ocr_extraction():
from omnidocs.tasks.ocr_extraction.extractors.paddle import PaddleOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.tesseract_ocr import TesseractOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.easy_ocr import EasyOCRExtractor
from omnidocs.tasks.ocr_extraction.extractors.surya_ocr import SuryaOCRExtractor
from pathlib import Path
extractors = [PaddleOCRExtractor, TesseractOCRExtractor, EasyOCRExtractor, SuryaOCRExtractor]
image_path = Path("Omnidocs") / "tests" / "ocr_extraction" / "assets" / "invoice.jpg"
for extractor_cls in extractors:
print(f"\nTesting {extractor_cls.__name__}")
print("-" * 40)
try:
extractor = extractor_cls()
result = extractor.extract(image_path)
print(f"Text length: {len(result.full_text)} chars")
vis_path = f"visualized_{extractor_cls.__name__}.png"
extractor.visualize(result, image_path, vis_path)
#load and visualize, if already saved as json
#extractor.visualize_from_json("image.jpg", "results.json", "viz.png")
# with custom styling
extractor.visualize(
result,
image_path,
f"styled_{extractor_cls.__name__}.png",
box_color='green',
box_width=3,
show_text=True,
text_color='red'
)
print("SUCCESS")
except Exception as e:
print(f"ERROR: {e}")
In [ ]:
Copied!
test_ocr_extraction()
test_ocr_extraction()