EasyOCR¶

EasyOCR extractor.

EasyOCR is a PyTorch-based OCR engine with excellent multi-language support. - GPU accelerated (optional) - Supports 80+ languages - Good for scene text and printed documents

Python Package

pip install easyocr

Model Download Location

By default, EasyOCR downloads models to ~/.EasyOCR/ Can be overridden with model_storage_directory parameter

EasyOCRConfig ¶

Bases: BaseModel

Configuration for EasyOCR extractor.

This is a single-backend model (PyTorch - CPU/GPU).

Example

config = EasyOCRConfig(languages=["en", "ch_sim"], gpu=True)
ocr = EasyOCR(config=config)

EasyOCR ¶

EasyOCR(config: EasyOCRConfig)

Bases: BaseOCRExtractor

EasyOCR text extractor.

Single-backend model (PyTorch - CPU/GPU).

Example

from omnidocs.tasks.ocr_extraction import EasyOCR, EasyOCRConfig

ocr = EasyOCR(config=EasyOCRConfig(languages=["en"], gpu=True))
result = ocr.extract(image)

for block in result.text_blocks:
        print(f"'{block.text}' @ {block.bbox.to_list()}")

Initialize EasyOCR extractor.

PARAMETER	DESCRIPTION
`config`	Configuration object TYPE: `EasyOCRConfig`

RAISES	DESCRIPTION
`ImportError`	If easyocr is not installed

Source code in omnidocs/tasks/ocr_extraction/easyocr.py

def __init__(self, config: EasyOCRConfig):
    """
    Initialize EasyOCR extractor.

    Args:
        config: Configuration object

    Raises:
        ImportError: If easyocr is not installed
    """
    self.config = config
    self._reader = None
    self._load_model()

extract ¶

extract(
    image: Union[Image, ndarray, str, Path],
    detail: int = 1,
    paragraph: bool = False,
    min_size: int = 10,
    text_threshold: float = 0.7,
    low_text: float = 0.4,
    link_threshold: float = 0.4,
    canvas_size: int = 2560,
    mag_ratio: float = 1.0,
) -> OCROutput

Run OCR on an image.

PARAMETER	DESCRIPTION
`image`	Input image (PIL Image, numpy array, or path) TYPE: `Union[Image, ndarray, str, Path]`
`detail`	0 = simple output, 1 = detailed with boxes TYPE: `int` DEFAULT: `1`
`paragraph`	Combine results into paragraphs TYPE: `bool` DEFAULT: `False`
`min_size`	Minimum text box size TYPE: `int` DEFAULT: `10`
`text_threshold`	Text confidence threshold TYPE: `float` DEFAULT: `0.7`
`low_text`	Low text bound TYPE: `float` DEFAULT: `0.4`
`link_threshold`	Link threshold for text joining TYPE: `float` DEFAULT: `0.4`
`canvas_size`	Max image dimension for processing TYPE: `int` DEFAULT: `2560`
`mag_ratio`	Magnification ratio TYPE: `float` DEFAULT: `1.0`

RETURNS	DESCRIPTION
`OCROutput`	OCROutput with detected text blocks

RAISES	DESCRIPTION
`ValueError`	If detail is not 0 or 1
`RuntimeError`	If EasyOCR is not initialized

Source code in omnidocs/tasks/ocr_extraction/easyocr.py

def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    detail: int = 1,
    paragraph: bool = False,
    min_size: int = 10,
    text_threshold: float = 0.7,
    low_text: float = 0.4,
    link_threshold: float = 0.4,
    canvas_size: int = 2560,
    mag_ratio: float = 1.0,
) -> OCROutput:
    """
    Run OCR on an image.

    Args:
        image: Input image (PIL Image, numpy array, or path)
        detail: 0 = simple output, 1 = detailed with boxes
        paragraph: Combine results into paragraphs
        min_size: Minimum text box size
        text_threshold: Text confidence threshold
        low_text: Low text bound
        link_threshold: Link threshold for text joining
        canvas_size: Max image dimension for processing
        mag_ratio: Magnification ratio

    Returns:
        OCROutput with detected text blocks

    Raises:
        ValueError: If detail is not 0 or 1
        RuntimeError: If EasyOCR is not initialized
    """
    if self._reader is None:
        raise RuntimeError("EasyOCR not initialized. Call _load_model() first.")

    # Validate detail parameter
    if detail not in (0, 1):
        raise ValueError(f"detail must be 0 or 1, got {detail}")

    # Prepare image
    pil_image = self._prepare_image(image)
    image_width, image_height = pil_image.size

    # Convert to numpy array for EasyOCR
    image_array = np.array(pil_image)

    # Run EasyOCR
    results = self._reader.readtext(
        image_array,
        detail=detail,
        paragraph=paragraph,
        min_size=min_size,
        text_threshold=text_threshold,
        low_text=low_text,
        link_threshold=link_threshold,
        canvas_size=canvas_size,
        mag_ratio=mag_ratio,
    )

    # Parse results
    text_blocks = []
    full_text_parts = []

    for result in results:
        if detail == 0:
            # Simple output: just text
            text = result
            confidence = 1.0
            bbox = BoundingBox(x1=0, y1=0, x2=0, y2=0)
            polygon = None
        else:
            # Detailed output: [polygon, text, confidence]
            polygon_points, text, confidence = result

            # EasyOCR returns 4 corner points: [[x1,y1], [x2,y1], [x2,y2], [x1,y2]]
            # Convert to list of lists for storage
            polygon = [list(p) for p in polygon_points]

            # Convert to axis-aligned bounding box
            bbox = BoundingBox.from_polygon(polygon)

        if not text.strip():
            continue

        text_blocks.append(
            TextBlock(
                text=text,
                bbox=bbox,
                confidence=float(confidence),
                granularity=(OCRGranularity.LINE if paragraph else OCRGranularity.WORD),
                polygon=polygon,
                language="+".join(self.config.languages),
            )
        )

        full_text_parts.append(text)

    # Sort by position
    text_blocks.sort(key=lambda b: (b.bbox.y1, b.bbox.x1))

    return OCROutput(
        text_blocks=text_blocks,
        full_text=" ".join(full_text_parts),
        image_width=image_width,
        image_height=image_height,
        model_name=self.MODEL_NAME,
        languages_detected=self.config.languages,
    )

extract_batch ¶

extract_batch(
    images: List[Union[Image, ndarray, str, Path]], **kwargs
) -> List[OCROutput]

Run OCR on multiple images.

PARAMETER	DESCRIPTION
`images`	List of input images TYPE: `List[Union[Image, ndarray, str, Path]]`
`**kwargs`	Arguments passed to extract() DEFAULT: `{}`

RETURNS	DESCRIPTION
`List[OCROutput]`	List of OCROutput objects

Source code in omnidocs/tasks/ocr_extraction/easyocr.py

def extract_batch(
    self,
    images: List[Union[Image.Image, np.ndarray, str, Path]],
    **kwargs,
) -> List[OCROutput]:
    """
    Run OCR on multiple images.

    Args:
        images: List of input images
        **kwargs: Arguments passed to extract()

    Returns:
        List of OCROutput objects
    """
    results = []
    for img in images:
        results.append(self.extract(img, **kwargs))
    return results