Skip to content

Predictor

Rule-based reading order predictor.

Uses spatial analysis and R-tree indexing to determine the logical reading sequence of document elements. Self-contained implementation without external dependencies on docling-ibm-models.

Based on the algorithm from docling-ibm-models, adapted for omnidocs.

RuleBasedReadingOrderPredictor

RuleBasedReadingOrderPredictor()

Bases: BaseReadingOrderPredictor

Rule-based reading order predictor using spatial analysis.

Uses R-tree spatial indexing and rule-based algorithms to determine the logical reading sequence of document elements. This is a CPU-only implementation that doesn't require GPU resources.

Features: - Multi-column layout detection - Header/footer separation - Caption-to-figure/table association - Footnote linking - Element merge suggestions

Example
from omnidocs.tasks.reading_order import RuleBasedReadingOrderPredictor
from omnidocs.tasks.layout_extraction import DocLayoutYOLO, DocLayoutYOLOConfig
from omnidocs.tasks.ocr_extraction import EasyOCR, EasyOCRConfig

# Initialize components
layout_extractor = DocLayoutYOLO(config=DocLayoutYOLOConfig())
ocr = EasyOCR(config=EasyOCRConfig())
predictor = RuleBasedReadingOrderPredictor()

# Process document
layout = layout_extractor.extract(image)
ocr_result = ocr.extract(image)
reading_order = predictor.predict(layout, ocr_result)

# Get text in reading order
text = reading_order.get_full_text()

Initialize the reading order predictor.

Source code in omnidocs/tasks/reading_order/rule_based/predictor.py
def __init__(self):
    """Initialize the reading order predictor."""
    self.dilated_page_element = True
    # Apply horizontal dilation only if less than this page-width normalized threshold
    self._horizontal_dilation_threshold_norm = 0.15

predict

predict(
    layout: LayoutOutput,
    ocr: Optional[OCROutput] = None,
    page_no: int = 0,
) -> ReadingOrderOutput

Predict reading order for a single page.

PARAMETER DESCRIPTION
layout

Layout detection results with bounding boxes

TYPE: LayoutOutput

ocr

Optional OCR results for text content

TYPE: Optional[OCROutput] DEFAULT: None

page_no

Page number (for multi-page documents)

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
ReadingOrderOutput

ReadingOrderOutput with ordered elements and associations

Source code in omnidocs/tasks/reading_order/rule_based/predictor.py
def predict(
    self,
    layout: "LayoutOutput",
    ocr: Optional["OCROutput"] = None,
    page_no: int = 0,
) -> ReadingOrderOutput:
    """
    Predict reading order for a single page.

    Args:
        layout: Layout detection results with bounding boxes
        ocr: Optional OCR results for text content
        page_no: Page number (for multi-page documents)

    Returns:
        ReadingOrderOutput with ordered elements and associations
    """
    page_width = layout.image_width
    page_height = layout.image_height

    # Build text map from OCR if available
    text_map: Dict[int, str] = {}
    if ocr:
        text_map = self._build_text_map(layout, ocr)

    # Convert layout boxes to internal PageElements
    page_elements: List[_PageElement] = []
    for i, box in enumerate(layout.bboxes):
        label_str = box.label.value.lower()
        element_type = LABEL_TO_ELEMENT_TYPE.get(label_str, ElementType.OTHER)

        # Convert from top-left origin to bottom-left origin
        elem = _PageElement(
            cid=i,
            text=text_map.get(i, ""),
            page_no=page_no,
            page_width=page_width,
            page_height=page_height,
            label=element_type,
            left=box.bbox.x1,
            bottom=page_height - box.bbox.y2,  # Convert y2 to bottom
            right=box.bbox.x2,
            top=page_height - box.bbox.y1,  # Convert y1 to top
        )
        page_elements.append(elem)

    # Run reading order prediction
    sorted_elements = self._predict_reading_order(page_elements)

    # Get caption associations
    caption_map = self._find_to_captions(sorted_elements)

    # Get footnote associations
    footnote_map = self._find_to_footnotes(sorted_elements)

    # Get merge suggestions
    merge_map = self._predict_merges(sorted_elements)

    # Convert to OrderedElements
    ordered_elements: List[OrderedElement] = []
    for idx, elem in enumerate(sorted_elements):
        # Convert back from bottom-left to top-left origin
        bbox = BoundingBox(
            x1=elem.left,
            y1=page_height - elem.top,
            x2=elem.right,
            y2=page_height - elem.bottom,
        )

        confidence = 1.0
        if elem.cid < len(layout.bboxes):
            confidence = layout.bboxes[elem.cid].confidence

        ordered_elem = OrderedElement(
            index=idx,
            element_type=elem.label,
            bbox=bbox,
            text=elem.text,
            confidence=confidence,
            page_no=page_no,
            original_id=elem.cid,
        )
        ordered_elements.append(ordered_elem)

    return ReadingOrderOutput(
        ordered_elements=ordered_elements,
        caption_map=caption_map,
        footnote_map=footnote_map,
        merge_map=merge_map,
        image_width=page_width,
        image_height=page_height,
        model_name="RuleBasedReadingOrderPredictor",
    )