Skip to content

Overview

Table Extraction Module.

Provides extractors for detecting and extracting table structure from document images. Outputs structured table data with cells, spans, and multiple export formats (HTML, Markdown, Pandas DataFrame).

Available Extractors
  • TableFormerExtractor: Transformer-based table structure extractor
Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Initialize extractor
extractor = TableFormerExtractor(
    config=TableFormerConfig(mode="fast", device="cuda")
)

# Extract table structure
result = extractor.extract(table_image)

# Get HTML output
html = result.to_html()

# Get DataFrame
df = result.to_dataframe()

# Get Markdown
md = result.to_markdown()

# Access cells
for cell in result.cells:
    print(f"[{cell.row},{cell.col}] {cell.text}")

BaseTableExtractor

Bases: ABC

Abstract base class for table structure extractors.

Table extractors analyze table images to detect cell structure, identify headers, and extract text content.

Example
class MyTableExtractor(BaseTableExtractor):
    def __init__(self, config: MyConfig):
        self.config = config
        self._load_model()

    def _load_model(self):
        # Load model weights
        pass

    def extract(self, image):
        # Run extraction
        return TableOutput(...)

extract abstractmethod

extract(
    image: Union[Image, ndarray, str, Path],
    ocr_output: Optional[OCROutput] = None,
) -> TableOutput

Extract table structure from an image.

PARAMETER DESCRIPTION
image

Table image (should be cropped to table region)

TYPE: Union[Image, ndarray, str, Path]

ocr_output

Optional OCR results for cell text matching. If not provided, model will attempt to extract text.

TYPE: Optional[OCROutput] DEFAULT: None

RETURNS DESCRIPTION
TableOutput

TableOutput with cells, structure, and export methods

Example
# Without OCR (model extracts text)
result = extractor.extract(table_image)

# With OCR (better text quality)
ocr = some_ocr.extract(table_image)
result = extractor.extract(table_image, ocr_output=ocr)
Source code in omnidocs/tasks/table_extraction/base.py
@abstractmethod
def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    ocr_output: Optional["OCROutput"] = None,
) -> TableOutput:
    """
    Extract table structure from an image.

    Args:
        image: Table image (should be cropped to table region)
        ocr_output: Optional OCR results for cell text matching.
                   If not provided, model will attempt to extract text.

    Returns:
        TableOutput with cells, structure, and export methods

    Example:
        ```python
        # Without OCR (model extracts text)
        result = extractor.extract(table_image)

        # With OCR (better text quality)
        ocr = some_ocr.extract(table_image)
        result = extractor.extract(table_image, ocr_output=ocr)
        ```
    """
    pass

batch_extract

batch_extract(
    images: List[Union[Image, ndarray, str, Path]],
    ocr_outputs: Optional[List[OCROutput]] = None,
    progress_callback: Optional[
        Callable[[int, int], None]
    ] = None,
) -> List[TableOutput]

Extract tables from multiple images.

Default implementation loops over extract(). Subclasses can override for optimized batching.

PARAMETER DESCRIPTION
images

List of table images

TYPE: List[Union[Image, ndarray, str, Path]]

ocr_outputs

Optional list of OCR results (same length as images)

TYPE: Optional[List[OCROutput]] DEFAULT: None

progress_callback

Optional function(current, total) for progress

TYPE: Optional[Callable[[int, int], None]] DEFAULT: None

RETURNS DESCRIPTION
List[TableOutput]

List of TableOutput in same order as input

Examples:

results = extractor.batch_extract(table_images)
Source code in omnidocs/tasks/table_extraction/base.py
def batch_extract(
    self,
    images: List[Union[Image.Image, np.ndarray, str, Path]],
    ocr_outputs: Optional[List["OCROutput"]] = None,
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[TableOutput]:
    """
    Extract tables from multiple images.

    Default implementation loops over extract(). Subclasses can override
    for optimized batching.

    Args:
        images: List of table images
        ocr_outputs: Optional list of OCR results (same length as images)
        progress_callback: Optional function(current, total) for progress

    Returns:
        List of TableOutput in same order as input

    Examples:
        ```python
        results = extractor.batch_extract(table_images)
        ```
    """
    results = []
    total = len(images)

    for i, image in enumerate(images):
        if progress_callback:
            progress_callback(i + 1, total)

        ocr = ocr_outputs[i] if ocr_outputs else None
        result = self.extract(image, ocr_output=ocr)
        results.append(result)

    return results

extract_document

extract_document(
    document: Document,
    table_bboxes: Optional[List[List[float]]] = None,
    progress_callback: Optional[
        Callable[[int, int], None]
    ] = None,
) -> List[TableOutput]

Extract tables from all pages of a document.

PARAMETER DESCRIPTION
document

Document instance

TYPE: Document

table_bboxes

Optional list of table bounding boxes per page. Each element should be a list of [x1, y1, x2, y2] coords.

TYPE: Optional[List[List[float]]] DEFAULT: None

progress_callback

Optional function(current, total) for progress

TYPE: Optional[Callable[[int, int], None]] DEFAULT: None

RETURNS DESCRIPTION
List[TableOutput]

List of TableOutput, one per detected table

Examples:

doc = Document.from_pdf("paper.pdf")
results = extractor.extract_document(doc)
Source code in omnidocs/tasks/table_extraction/base.py
def extract_document(
    self,
    document: "Document",
    table_bboxes: Optional[List[List[float]]] = None,
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[TableOutput]:
    """
    Extract tables from all pages of a document.

    Args:
        document: Document instance
        table_bboxes: Optional list of table bounding boxes per page.
                     Each element should be a list of [x1, y1, x2, y2] coords.
        progress_callback: Optional function(current, total) for progress

    Returns:
        List of TableOutput, one per detected table

    Examples:
        ```python
        doc = Document.from_pdf("paper.pdf")
        results = extractor.extract_document(doc)
        ```
    """
    results = []
    total = document.page_count

    for i, page in enumerate(document.iter_pages()):
        if progress_callback:
            progress_callback(i + 1, total)

        # If no bboxes provided, process entire page
        if table_bboxes is None:
            result = self.extract(page)
            results.append(result)
        else:
            # Crop and process each table region
            for bbox in table_bboxes:
                x1, y1, x2, y2 = bbox
                table_region = page.crop((x1, y1, x2, y2))
                result = self.extract(table_region)
                results.append(result)

    return results

BoundingBox

Bases: BaseModel

Bounding box in pixel coordinates.

width property

width: float

Width of the bounding box.

height property

height: float

Height of the bounding box.

area property

area: float

Area of the bounding box.

center property

center: Tuple[float, float]

Center point of the bounding box.

to_list

to_list() -> List[float]

Convert to [x1, y1, x2, y2] list.

Source code in omnidocs/tasks/table_extraction/models.py
def to_list(self) -> List[float]:
    """Convert to [x1, y1, x2, y2] list."""
    return [self.x1, self.y1, self.x2, self.y2]

to_xyxy

to_xyxy() -> Tuple[float, float, float, float]

Convert to (x1, y1, x2, y2) tuple.

Source code in omnidocs/tasks/table_extraction/models.py
def to_xyxy(self) -> Tuple[float, float, float, float]:
    """Convert to (x1, y1, x2, y2) tuple."""
    return (self.x1, self.y1, self.x2, self.y2)

from_list classmethod

from_list(coords: List[float]) -> BoundingBox

Create from [x1, y1, x2, y2] list.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def from_list(cls, coords: List[float]) -> "BoundingBox":
    """Create from [x1, y1, x2, y2] list."""
    if len(coords) != 4:
        raise ValueError(f"Expected 4 coordinates, got {len(coords)}")
    return cls(x1=coords[0], y1=coords[1], x2=coords[2], y2=coords[3])

from_ltrb classmethod

from_ltrb(
    left: float, top: float, right: float, bottom: float
) -> BoundingBox

Create from left, top, right, bottom coordinates.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def from_ltrb(cls, left: float, top: float, right: float, bottom: float) -> "BoundingBox":
    """Create from left, top, right, bottom coordinates."""
    return cls(x1=left, y1=top, x2=right, y2=bottom)

to_normalized

to_normalized(
    image_width: int, image_height: int
) -> BoundingBox

Convert to normalized coordinates (0-1024 range).

PARAMETER DESCRIPTION
image_width

Original image width in pixels

TYPE: int

image_height

Original image height in pixels

TYPE: int

RETURNS DESCRIPTION
BoundingBox

New BoundingBox with coordinates in 0-1024 range

Source code in omnidocs/tasks/table_extraction/models.py
def to_normalized(self, image_width: int, image_height: int) -> "BoundingBox":
    """
    Convert to normalized coordinates (0-1024 range).

    Args:
        image_width: Original image width in pixels
        image_height: Original image height in pixels

    Returns:
        New BoundingBox with coordinates in 0-1024 range
    """
    return BoundingBox(
        x1=self.x1 / image_width * NORMALIZED_SIZE,
        y1=self.y1 / image_height * NORMALIZED_SIZE,
        x2=self.x2 / image_width * NORMALIZED_SIZE,
        y2=self.y2 / image_height * NORMALIZED_SIZE,
    )

CellType

Bases: str, Enum

Type of table cell.

TableCell

Bases: BaseModel

Single table cell with position, span, and content.

The cell position uses 0-indexed row/column indices. Spans indicate how many rows/columns the cell occupies.

end_row property

end_row: int

Ending row index (exclusive).

end_col property

end_col: int

Ending column index (exclusive).

is_header property

is_header: bool

Check if cell is any type of header.

to_dict

to_dict() -> Dict

Convert to dictionary representation.

Source code in omnidocs/tasks/table_extraction/models.py
def to_dict(self) -> Dict:
    """Convert to dictionary representation."""
    return {
        "row": self.row,
        "col": self.col,
        "row_span": self.row_span,
        "col_span": self.col_span,
        "text": self.text,
        "cell_type": self.cell_type.value,
        "bbox": self.bbox.to_list() if self.bbox else None,
        "confidence": self.confidence,
    }

TableOutput

Bases: BaseModel

Complete table extraction result.

Provides multiple export formats and utility methods for working with extracted table data.

Example
result = extractor.extract(table_image)

# Basic info
print(f"Table: {result.num_rows}x{result.num_cols}")

# Export to HTML
html = result.to_html()

# Export to Pandas
df = result.to_dataframe()

# Export to Markdown
md = result.to_markdown()

# Access specific cell
cell = result.get_cell(row=0, col=0)

cell_count property

cell_count: int

Number of cells in the table.

has_headers property

has_headers: bool

Check if table has header cells.

get_cell

get_cell(row: int, col: int) -> Optional[TableCell]

Get cell at specific position.

Handles merged cells by returning the cell that covers the position.

Source code in omnidocs/tasks/table_extraction/models.py
def get_cell(self, row: int, col: int) -> Optional[TableCell]:
    """
    Get cell at specific position.

    Handles merged cells by returning the cell that covers the position.
    """
    for cell in self.cells:
        if cell.row <= row < cell.end_row and cell.col <= col < cell.end_col:
            return cell
    return None

get_row

get_row(row: int) -> List[TableCell]

Get all cells in a specific row.

Source code in omnidocs/tasks/table_extraction/models.py
def get_row(self, row: int) -> List[TableCell]:
    """Get all cells in a specific row."""
    return [c for c in self.cells if c.row == row]

get_column

get_column(col: int) -> List[TableCell]

Get all cells in a specific column.

Source code in omnidocs/tasks/table_extraction/models.py
def get_column(self, col: int) -> List[TableCell]:
    """Get all cells in a specific column."""
    return [c for c in self.cells if c.col == col]

to_html

to_html(include_styles: bool = True) -> str

Convert table to HTML string.

PARAMETER DESCRIPTION
include_styles

Whether to include basic CSS styling

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
str

HTML table string

Example
html = result.to_html()
with open("table.html", "w") as f:
    f.write(html)
Source code in omnidocs/tasks/table_extraction/models.py
def to_html(self, include_styles: bool = True) -> str:
    """
    Convert table to HTML string.

    Args:
        include_styles: Whether to include basic CSS styling

    Returns:
        HTML table string

    Example:
        ```python
        html = result.to_html()
        with open("table.html", "w") as f:
            f.write(html)
        ```
    """
    # Build 2D grid accounting for spans
    grid: List[List[Optional[TableCell]]] = [[None for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        for r in range(cell.row, cell.end_row):
            for c in range(cell.col, cell.end_col):
                if r < self.num_rows and c < self.num_cols:
                    grid[r][c] = cell

    # Generate HTML
    lines = []

    if include_styles:
        lines.append('<table style="border-collapse: collapse; width: 100%;">')
    else:
        lines.append("<table>")

    processed: set[Tuple[int, int]] = set()  # Track cells we've already output

    for row_idx in range(self.num_rows):
        lines.append("  <tr>")

        for col_idx in range(self.num_cols):
            cell = grid[row_idx][col_idx]

            if cell is None:
                lines.append("    <td></td>")
                continue

            # Skip if this cell was already output (merged cell)
            cell_id = (cell.row, cell.col)
            if cell_id in processed:
                continue
            processed.add(cell_id)

            # Determine tag based on cell type
            tag = "th" if cell.is_header else "td"

            # Build attributes
            attrs = []
            if cell.row_span > 1:
                attrs.append(f'rowspan="{cell.row_span}"')
            if cell.col_span > 1:
                attrs.append(f'colspan="{cell.col_span}"')
            if include_styles:
                attrs.append('style="border: 1px solid #ddd; padding: 8px;"')

            attr_str = " " + " ".join(attrs) if attrs else ""

            # Escape HTML in text
            text = (cell.text or "").replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

            lines.append(f"    <{tag}{attr_str}>{text}</{tag}>")

        lines.append("  </tr>")

    lines.append("</table>")

    return "\n".join(lines)

to_dataframe

to_dataframe()

Convert table to Pandas DataFrame.

RETURNS DESCRIPTION

pandas.DataFrame with table data

RAISES DESCRIPTION
ImportError

If pandas is not installed

Example
df = result.to_dataframe()
print(df.head())
df.to_csv("table.csv")
Source code in omnidocs/tasks/table_extraction/models.py
def to_dataframe(self):
    """
    Convert table to Pandas DataFrame.

    Returns:
        pandas.DataFrame with table data

    Raises:
        ImportError: If pandas is not installed

    Example:
        ```python
        df = result.to_dataframe()
        print(df.head())
        df.to_csv("table.csv")
        ```
    """
    try:
        import pandas as pd
    except ImportError:
        raise ImportError("pandas is required for to_dataframe(). Install with: pip install pandas")

    # Build 2D array
    data: List[List[Optional[str]]] = [[None for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        # For merged cells, put value in top-left position
        if cell.row < self.num_rows and cell.col < self.num_cols:
            data[cell.row][cell.col] = cell.text

    # Determine if first row is header
    first_row_cells = self.get_row(0)
    use_header = all(c.cell_type == CellType.COLUMN_HEADER for c in first_row_cells) if first_row_cells else False

    if use_header and self.num_rows > 1:
        headers = data[0]
        data = data[1:]
        return pd.DataFrame(data, columns=headers)
    else:
        return pd.DataFrame(data)

to_markdown

to_markdown() -> str

Convert table to Markdown format.

Note: Markdown tables don't support merged cells, so spans are ignored and only the top-left cell value is used.

RETURNS DESCRIPTION
str

Markdown table string

Source code in omnidocs/tasks/table_extraction/models.py
def to_markdown(self) -> str:
    """
    Convert table to Markdown format.

    Note: Markdown tables don't support merged cells, so spans
    are ignored and only the top-left cell value is used.

    Returns:
        Markdown table string
    """
    if self.num_rows == 0 or self.num_cols == 0:
        return ""

    # Build 2D grid
    grid: List[List[str]] = [["" for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        if cell.row < self.num_rows and cell.col < self.num_cols:
            grid[cell.row][cell.col] = cell.text or ""

    lines = []

    # Header row
    lines.append("| " + " | ".join(grid[0]) + " |")

    # Separator
    lines.append("| " + " | ".join(["---"] * self.num_cols) + " |")

    # Data rows
    for row in grid[1:]:
        lines.append("| " + " | ".join(row) + " |")

    return "\n".join(lines)

to_dict

to_dict() -> Dict

Convert to dictionary representation.

Source code in omnidocs/tasks/table_extraction/models.py
def to_dict(self) -> Dict:
    """Convert to dictionary representation."""
    return {
        "cells": [c.to_dict() for c in self.cells],
        "num_rows": self.num_rows,
        "num_cols": self.num_cols,
        "image_width": self.image_width,
        "image_height": self.image_height,
        "model_name": self.model_name,
        "html": self.to_html(include_styles=False),
    }

save_json

save_json(file_path: Union[str, Path]) -> None

Save to JSON file.

Source code in omnidocs/tasks/table_extraction/models.py
def save_json(self, file_path: Union[str, Path]) -> None:
    """Save to JSON file."""
    path = Path(file_path)
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(self.model_dump_json(indent=2), encoding="utf-8")

load_json classmethod

load_json(file_path: Union[str, Path]) -> TableOutput

Load from JSON file.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def load_json(cls, file_path: Union[str, Path]) -> "TableOutput":
    """Load from JSON file."""
    path = Path(file_path)
    return cls.model_validate_json(path.read_text(encoding="utf-8"))

TableFormerConfig

Bases: BaseModel

Configuration for TableFormer table structure extractor.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags and cell bounding boxes.

ATTRIBUTE DESCRIPTION
mode

Inference mode - "fast" or "accurate"

TYPE: TableFormerMode

device

Device for inference - "cpu", "cuda", "mps", or "auto"

TYPE: Literal['cpu', 'cuda', 'mps', 'auto']

num_threads

Number of CPU threads for inference

TYPE: int

do_cell_matching

Whether to match predicted cells with OCR text cells

TYPE: bool

artifacts_path

Path to pre-downloaded model artifacts

TYPE: Optional[str]

repo_id

HuggingFace model repository

TYPE: str

revision

Model revision/tag

TYPE: str

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Fast mode
extractor = TableFormerExtractor(config=TableFormerConfig(mode="fast"))

# Accurate mode with GPU
extractor = TableFormerExtractor(
    config=TableFormerConfig(
        mode="accurate",
        device="cuda",
        do_cell_matching=True,
    )
)

TableFormerExtractor

TableFormerExtractor(config: TableFormerConfig)

Bases: BaseTableExtractor

Table structure extractor using TableFormer model.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags. It can detect: - Cell boundaries (bounding boxes) - Row and column spans - Header cells (column and row headers) - Section rows

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Initialize extractor
extractor = TableFormerExtractor(
    config=TableFormerConfig(mode="fast", device="cuda")
)

# Extract table structure
result = extractor.extract(table_image)

# Get HTML output
html = result.to_html()

# Get DataFrame
df = result.to_dataframe()

Initialize TableFormer extractor.

PARAMETER DESCRIPTION
config

TableFormerConfig with model settings

TYPE: TableFormerConfig

Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def __init__(self, config: TableFormerConfig):
    """
    Initialize TableFormer extractor.

    Args:
        config: TableFormerConfig with model settings
    """
    self.config = config
    self._device = _resolve_device(config.device)
    self._predictor = None
    self._model_config: Optional[Dict] = None
    self._load_model()

extract

extract(
    image: Union[Image, ndarray, str, Path],
    ocr_output: Optional[OCROutput] = None,
) -> TableOutput

Extract table structure from an image.

PARAMETER DESCRIPTION
image

Table image (should be cropped to table region)

TYPE: Union[Image, ndarray, str, Path]

ocr_output

Optional OCR results for cell text matching

TYPE: Optional[OCROutput] DEFAULT: None

RETURNS DESCRIPTION
TableOutput

TableOutput with cells, structure, and export methods

Example
result = extractor.extract(table_image)
print(f"Table: {result.num_rows}x{result.num_cols}")
html = result.to_html()
Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    ocr_output: Optional["OCROutput"] = None,
) -> TableOutput:
    """
    Extract table structure from an image.

    Args:
        image: Table image (should be cropped to table region)
        ocr_output: Optional OCR results for cell text matching

    Returns:
        TableOutput with cells, structure, and export methods

    Example:
        ```python
        result = extractor.extract(table_image)
        print(f"Table: {result.num_rows}x{result.num_cols}")
        html = result.to_html()
        ```
    """
    # Prepare image
    pil_image = self._prepare_image(image)
    width, height = pil_image.size

    # Convert to OpenCV format (required by TFPredictor)
    try:
        import cv2
    except ImportError:
        raise ImportError(
            "opencv-python is required for TableFormerExtractor. Install with: pip install opencv-python-headless"
        )

    cv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

    # Build iOCR page data
    tokens = self._build_tokens_from_ocr(ocr_output) if ocr_output else []
    iocr_page = {
        "width": width,
        "height": height,
        "image": cv_image,
        "tokens": tokens,
    }

    # Table bbox is the entire image
    table_bbox = [0, 0, width, height]

    # Run prediction
    results = self._predictor.multi_table_predict(
        iocr_page=iocr_page,
        table_bboxes=[table_bbox],
        do_matching=self.config.do_cell_matching,
        correct_overlapping_cells=self.config.correct_overlapping_cells,
        sort_row_col_indexes=self.config.sort_row_col_indexes,
    )

    # Convert results to TableOutput
    return self._convert_results(results, width, height)

TableFormerMode

Bases: str, Enum

TableFormer inference mode.

base

Base class for table extractors.

Defines the abstract interface that all table extractors must implement.

BaseTableExtractor

Bases: ABC

Abstract base class for table structure extractors.

Table extractors analyze table images to detect cell structure, identify headers, and extract text content.

Example
class MyTableExtractor(BaseTableExtractor):
    def __init__(self, config: MyConfig):
        self.config = config
        self._load_model()

    def _load_model(self):
        # Load model weights
        pass

    def extract(self, image):
        # Run extraction
        return TableOutput(...)

extract abstractmethod

extract(
    image: Union[Image, ndarray, str, Path],
    ocr_output: Optional[OCROutput] = None,
) -> TableOutput

Extract table structure from an image.

PARAMETER DESCRIPTION
image

Table image (should be cropped to table region)

TYPE: Union[Image, ndarray, str, Path]

ocr_output

Optional OCR results for cell text matching. If not provided, model will attempt to extract text.

TYPE: Optional[OCROutput] DEFAULT: None

RETURNS DESCRIPTION
TableOutput

TableOutput with cells, structure, and export methods

Example
# Without OCR (model extracts text)
result = extractor.extract(table_image)

# With OCR (better text quality)
ocr = some_ocr.extract(table_image)
result = extractor.extract(table_image, ocr_output=ocr)
Source code in omnidocs/tasks/table_extraction/base.py
@abstractmethod
def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    ocr_output: Optional["OCROutput"] = None,
) -> TableOutput:
    """
    Extract table structure from an image.

    Args:
        image: Table image (should be cropped to table region)
        ocr_output: Optional OCR results for cell text matching.
                   If not provided, model will attempt to extract text.

    Returns:
        TableOutput with cells, structure, and export methods

    Example:
        ```python
        # Without OCR (model extracts text)
        result = extractor.extract(table_image)

        # With OCR (better text quality)
        ocr = some_ocr.extract(table_image)
        result = extractor.extract(table_image, ocr_output=ocr)
        ```
    """
    pass

batch_extract

batch_extract(
    images: List[Union[Image, ndarray, str, Path]],
    ocr_outputs: Optional[List[OCROutput]] = None,
    progress_callback: Optional[
        Callable[[int, int], None]
    ] = None,
) -> List[TableOutput]

Extract tables from multiple images.

Default implementation loops over extract(). Subclasses can override for optimized batching.

PARAMETER DESCRIPTION
images

List of table images

TYPE: List[Union[Image, ndarray, str, Path]]

ocr_outputs

Optional list of OCR results (same length as images)

TYPE: Optional[List[OCROutput]] DEFAULT: None

progress_callback

Optional function(current, total) for progress

TYPE: Optional[Callable[[int, int], None]] DEFAULT: None

RETURNS DESCRIPTION
List[TableOutput]

List of TableOutput in same order as input

Examples:

results = extractor.batch_extract(table_images)
Source code in omnidocs/tasks/table_extraction/base.py
def batch_extract(
    self,
    images: List[Union[Image.Image, np.ndarray, str, Path]],
    ocr_outputs: Optional[List["OCROutput"]] = None,
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[TableOutput]:
    """
    Extract tables from multiple images.

    Default implementation loops over extract(). Subclasses can override
    for optimized batching.

    Args:
        images: List of table images
        ocr_outputs: Optional list of OCR results (same length as images)
        progress_callback: Optional function(current, total) for progress

    Returns:
        List of TableOutput in same order as input

    Examples:
        ```python
        results = extractor.batch_extract(table_images)
        ```
    """
    results = []
    total = len(images)

    for i, image in enumerate(images):
        if progress_callback:
            progress_callback(i + 1, total)

        ocr = ocr_outputs[i] if ocr_outputs else None
        result = self.extract(image, ocr_output=ocr)
        results.append(result)

    return results

extract_document

extract_document(
    document: Document,
    table_bboxes: Optional[List[List[float]]] = None,
    progress_callback: Optional[
        Callable[[int, int], None]
    ] = None,
) -> List[TableOutput]

Extract tables from all pages of a document.

PARAMETER DESCRIPTION
document

Document instance

TYPE: Document

table_bboxes

Optional list of table bounding boxes per page. Each element should be a list of [x1, y1, x2, y2] coords.

TYPE: Optional[List[List[float]]] DEFAULT: None

progress_callback

Optional function(current, total) for progress

TYPE: Optional[Callable[[int, int], None]] DEFAULT: None

RETURNS DESCRIPTION
List[TableOutput]

List of TableOutput, one per detected table

Examples:

doc = Document.from_pdf("paper.pdf")
results = extractor.extract_document(doc)
Source code in omnidocs/tasks/table_extraction/base.py
def extract_document(
    self,
    document: "Document",
    table_bboxes: Optional[List[List[float]]] = None,
    progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[TableOutput]:
    """
    Extract tables from all pages of a document.

    Args:
        document: Document instance
        table_bboxes: Optional list of table bounding boxes per page.
                     Each element should be a list of [x1, y1, x2, y2] coords.
        progress_callback: Optional function(current, total) for progress

    Returns:
        List of TableOutput, one per detected table

    Examples:
        ```python
        doc = Document.from_pdf("paper.pdf")
        results = extractor.extract_document(doc)
        ```
    """
    results = []
    total = document.page_count

    for i, page in enumerate(document.iter_pages()):
        if progress_callback:
            progress_callback(i + 1, total)

        # If no bboxes provided, process entire page
        if table_bboxes is None:
            result = self.extract(page)
            results.append(result)
        else:
            # Crop and process each table region
            for bbox in table_bboxes:
                x1, y1, x2, y2 = bbox
                table_region = page.crop((x1, y1, x2, y2))
                result = self.extract(table_region)
                results.append(result)

    return results

models

Pydantic models for table extraction outputs.

Provides structured table data with cells, spans, and multiple export formats including HTML, Markdown, and Pandas DataFrame conversion.

Example
result = extractor.extract(table_image)

# Get HTML
html = result.to_html()

# Get Pandas DataFrame
df = result.to_dataframe()

# Access cells
for cell in result.cells:
    print(f"[{cell.row},{cell.col}] {cell.text}")

CellType

Bases: str, Enum

Type of table cell.

BoundingBox

Bases: BaseModel

Bounding box in pixel coordinates.

width property

width: float

Width of the bounding box.

height property

height: float

Height of the bounding box.

area property

area: float

Area of the bounding box.

center property

center: Tuple[float, float]

Center point of the bounding box.

to_list

to_list() -> List[float]

Convert to [x1, y1, x2, y2] list.

Source code in omnidocs/tasks/table_extraction/models.py
def to_list(self) -> List[float]:
    """Convert to [x1, y1, x2, y2] list."""
    return [self.x1, self.y1, self.x2, self.y2]

to_xyxy

to_xyxy() -> Tuple[float, float, float, float]

Convert to (x1, y1, x2, y2) tuple.

Source code in omnidocs/tasks/table_extraction/models.py
def to_xyxy(self) -> Tuple[float, float, float, float]:
    """Convert to (x1, y1, x2, y2) tuple."""
    return (self.x1, self.y1, self.x2, self.y2)

from_list classmethod

from_list(coords: List[float]) -> BoundingBox

Create from [x1, y1, x2, y2] list.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def from_list(cls, coords: List[float]) -> "BoundingBox":
    """Create from [x1, y1, x2, y2] list."""
    if len(coords) != 4:
        raise ValueError(f"Expected 4 coordinates, got {len(coords)}")
    return cls(x1=coords[0], y1=coords[1], x2=coords[2], y2=coords[3])

from_ltrb classmethod

from_ltrb(
    left: float, top: float, right: float, bottom: float
) -> BoundingBox

Create from left, top, right, bottom coordinates.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def from_ltrb(cls, left: float, top: float, right: float, bottom: float) -> "BoundingBox":
    """Create from left, top, right, bottom coordinates."""
    return cls(x1=left, y1=top, x2=right, y2=bottom)

to_normalized

to_normalized(
    image_width: int, image_height: int
) -> BoundingBox

Convert to normalized coordinates (0-1024 range).

PARAMETER DESCRIPTION
image_width

Original image width in pixels

TYPE: int

image_height

Original image height in pixels

TYPE: int

RETURNS DESCRIPTION
BoundingBox

New BoundingBox with coordinates in 0-1024 range

Source code in omnidocs/tasks/table_extraction/models.py
def to_normalized(self, image_width: int, image_height: int) -> "BoundingBox":
    """
    Convert to normalized coordinates (0-1024 range).

    Args:
        image_width: Original image width in pixels
        image_height: Original image height in pixels

    Returns:
        New BoundingBox with coordinates in 0-1024 range
    """
    return BoundingBox(
        x1=self.x1 / image_width * NORMALIZED_SIZE,
        y1=self.y1 / image_height * NORMALIZED_SIZE,
        x2=self.x2 / image_width * NORMALIZED_SIZE,
        y2=self.y2 / image_height * NORMALIZED_SIZE,
    )

TableCell

Bases: BaseModel

Single table cell with position, span, and content.

The cell position uses 0-indexed row/column indices. Spans indicate how many rows/columns the cell occupies.

end_row property

end_row: int

Ending row index (exclusive).

end_col property

end_col: int

Ending column index (exclusive).

is_header property

is_header: bool

Check if cell is any type of header.

to_dict

to_dict() -> Dict

Convert to dictionary representation.

Source code in omnidocs/tasks/table_extraction/models.py
def to_dict(self) -> Dict:
    """Convert to dictionary representation."""
    return {
        "row": self.row,
        "col": self.col,
        "row_span": self.row_span,
        "col_span": self.col_span,
        "text": self.text,
        "cell_type": self.cell_type.value,
        "bbox": self.bbox.to_list() if self.bbox else None,
        "confidence": self.confidence,
    }

TableOutput

Bases: BaseModel

Complete table extraction result.

Provides multiple export formats and utility methods for working with extracted table data.

Example
result = extractor.extract(table_image)

# Basic info
print(f"Table: {result.num_rows}x{result.num_cols}")

# Export to HTML
html = result.to_html()

# Export to Pandas
df = result.to_dataframe()

# Export to Markdown
md = result.to_markdown()

# Access specific cell
cell = result.get_cell(row=0, col=0)

cell_count property

cell_count: int

Number of cells in the table.

has_headers property

has_headers: bool

Check if table has header cells.

get_cell

get_cell(row: int, col: int) -> Optional[TableCell]

Get cell at specific position.

Handles merged cells by returning the cell that covers the position.

Source code in omnidocs/tasks/table_extraction/models.py
def get_cell(self, row: int, col: int) -> Optional[TableCell]:
    """
    Get cell at specific position.

    Handles merged cells by returning the cell that covers the position.
    """
    for cell in self.cells:
        if cell.row <= row < cell.end_row and cell.col <= col < cell.end_col:
            return cell
    return None

get_row

get_row(row: int) -> List[TableCell]

Get all cells in a specific row.

Source code in omnidocs/tasks/table_extraction/models.py
def get_row(self, row: int) -> List[TableCell]:
    """Get all cells in a specific row."""
    return [c for c in self.cells if c.row == row]

get_column

get_column(col: int) -> List[TableCell]

Get all cells in a specific column.

Source code in omnidocs/tasks/table_extraction/models.py
def get_column(self, col: int) -> List[TableCell]:
    """Get all cells in a specific column."""
    return [c for c in self.cells if c.col == col]

to_html

to_html(include_styles: bool = True) -> str

Convert table to HTML string.

PARAMETER DESCRIPTION
include_styles

Whether to include basic CSS styling

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
str

HTML table string

Example
html = result.to_html()
with open("table.html", "w") as f:
    f.write(html)
Source code in omnidocs/tasks/table_extraction/models.py
def to_html(self, include_styles: bool = True) -> str:
    """
    Convert table to HTML string.

    Args:
        include_styles: Whether to include basic CSS styling

    Returns:
        HTML table string

    Example:
        ```python
        html = result.to_html()
        with open("table.html", "w") as f:
            f.write(html)
        ```
    """
    # Build 2D grid accounting for spans
    grid: List[List[Optional[TableCell]]] = [[None for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        for r in range(cell.row, cell.end_row):
            for c in range(cell.col, cell.end_col):
                if r < self.num_rows and c < self.num_cols:
                    grid[r][c] = cell

    # Generate HTML
    lines = []

    if include_styles:
        lines.append('<table style="border-collapse: collapse; width: 100%;">')
    else:
        lines.append("<table>")

    processed: set[Tuple[int, int]] = set()  # Track cells we've already output

    for row_idx in range(self.num_rows):
        lines.append("  <tr>")

        for col_idx in range(self.num_cols):
            cell = grid[row_idx][col_idx]

            if cell is None:
                lines.append("    <td></td>")
                continue

            # Skip if this cell was already output (merged cell)
            cell_id = (cell.row, cell.col)
            if cell_id in processed:
                continue
            processed.add(cell_id)

            # Determine tag based on cell type
            tag = "th" if cell.is_header else "td"

            # Build attributes
            attrs = []
            if cell.row_span > 1:
                attrs.append(f'rowspan="{cell.row_span}"')
            if cell.col_span > 1:
                attrs.append(f'colspan="{cell.col_span}"')
            if include_styles:
                attrs.append('style="border: 1px solid #ddd; padding: 8px;"')

            attr_str = " " + " ".join(attrs) if attrs else ""

            # Escape HTML in text
            text = (cell.text or "").replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

            lines.append(f"    <{tag}{attr_str}>{text}</{tag}>")

        lines.append("  </tr>")

    lines.append("</table>")

    return "\n".join(lines)

to_dataframe

to_dataframe()

Convert table to Pandas DataFrame.

RETURNS DESCRIPTION

pandas.DataFrame with table data

RAISES DESCRIPTION
ImportError

If pandas is not installed

Example
df = result.to_dataframe()
print(df.head())
df.to_csv("table.csv")
Source code in omnidocs/tasks/table_extraction/models.py
def to_dataframe(self):
    """
    Convert table to Pandas DataFrame.

    Returns:
        pandas.DataFrame with table data

    Raises:
        ImportError: If pandas is not installed

    Example:
        ```python
        df = result.to_dataframe()
        print(df.head())
        df.to_csv("table.csv")
        ```
    """
    try:
        import pandas as pd
    except ImportError:
        raise ImportError("pandas is required for to_dataframe(). Install with: pip install pandas")

    # Build 2D array
    data: List[List[Optional[str]]] = [[None for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        # For merged cells, put value in top-left position
        if cell.row < self.num_rows and cell.col < self.num_cols:
            data[cell.row][cell.col] = cell.text

    # Determine if first row is header
    first_row_cells = self.get_row(0)
    use_header = all(c.cell_type == CellType.COLUMN_HEADER for c in first_row_cells) if first_row_cells else False

    if use_header and self.num_rows > 1:
        headers = data[0]
        data = data[1:]
        return pd.DataFrame(data, columns=headers)
    else:
        return pd.DataFrame(data)

to_markdown

to_markdown() -> str

Convert table to Markdown format.

Note: Markdown tables don't support merged cells, so spans are ignored and only the top-left cell value is used.

RETURNS DESCRIPTION
str

Markdown table string

Source code in omnidocs/tasks/table_extraction/models.py
def to_markdown(self) -> str:
    """
    Convert table to Markdown format.

    Note: Markdown tables don't support merged cells, so spans
    are ignored and only the top-left cell value is used.

    Returns:
        Markdown table string
    """
    if self.num_rows == 0 or self.num_cols == 0:
        return ""

    # Build 2D grid
    grid: List[List[str]] = [["" for _ in range(self.num_cols)] for _ in range(self.num_rows)]

    for cell in self.cells:
        if cell.row < self.num_rows and cell.col < self.num_cols:
            grid[cell.row][cell.col] = cell.text or ""

    lines = []

    # Header row
    lines.append("| " + " | ".join(grid[0]) + " |")

    # Separator
    lines.append("| " + " | ".join(["---"] * self.num_cols) + " |")

    # Data rows
    for row in grid[1:]:
        lines.append("| " + " | ".join(row) + " |")

    return "\n".join(lines)

to_dict

to_dict() -> Dict

Convert to dictionary representation.

Source code in omnidocs/tasks/table_extraction/models.py
def to_dict(self) -> Dict:
    """Convert to dictionary representation."""
    return {
        "cells": [c.to_dict() for c in self.cells],
        "num_rows": self.num_rows,
        "num_cols": self.num_cols,
        "image_width": self.image_width,
        "image_height": self.image_height,
        "model_name": self.model_name,
        "html": self.to_html(include_styles=False),
    }

save_json

save_json(file_path: Union[str, Path]) -> None

Save to JSON file.

Source code in omnidocs/tasks/table_extraction/models.py
def save_json(self, file_path: Union[str, Path]) -> None:
    """Save to JSON file."""
    path = Path(file_path)
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(self.model_dump_json(indent=2), encoding="utf-8")

load_json classmethod

load_json(file_path: Union[str, Path]) -> TableOutput

Load from JSON file.

Source code in omnidocs/tasks/table_extraction/models.py
@classmethod
def load_json(cls, file_path: Union[str, Path]) -> "TableOutput":
    """Load from JSON file."""
    path = Path(file_path)
    return cls.model_validate_json(path.read_text(encoding="utf-8"))

tableformer

TableFormer module for table structure extraction.

Provides the TableFormer-based table structure extractor.

TableFormerConfig

Bases: BaseModel

Configuration for TableFormer table structure extractor.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags and cell bounding boxes.

ATTRIBUTE DESCRIPTION
mode

Inference mode - "fast" or "accurate"

TYPE: TableFormerMode

device

Device for inference - "cpu", "cuda", "mps", or "auto"

TYPE: Literal['cpu', 'cuda', 'mps', 'auto']

num_threads

Number of CPU threads for inference

TYPE: int

do_cell_matching

Whether to match predicted cells with OCR text cells

TYPE: bool

artifacts_path

Path to pre-downloaded model artifacts

TYPE: Optional[str]

repo_id

HuggingFace model repository

TYPE: str

revision

Model revision/tag

TYPE: str

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Fast mode
extractor = TableFormerExtractor(config=TableFormerConfig(mode="fast"))

# Accurate mode with GPU
extractor = TableFormerExtractor(
    config=TableFormerConfig(
        mode="accurate",
        device="cuda",
        do_cell_matching=True,
    )
)

TableFormerMode

Bases: str, Enum

TableFormer inference mode.

TableFormerExtractor

TableFormerExtractor(config: TableFormerConfig)

Bases: BaseTableExtractor

Table structure extractor using TableFormer model.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags. It can detect: - Cell boundaries (bounding boxes) - Row and column spans - Header cells (column and row headers) - Section rows

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Initialize extractor
extractor = TableFormerExtractor(
    config=TableFormerConfig(mode="fast", device="cuda")
)

# Extract table structure
result = extractor.extract(table_image)

# Get HTML output
html = result.to_html()

# Get DataFrame
df = result.to_dataframe()

Initialize TableFormer extractor.

PARAMETER DESCRIPTION
config

TableFormerConfig with model settings

TYPE: TableFormerConfig

Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def __init__(self, config: TableFormerConfig):
    """
    Initialize TableFormer extractor.

    Args:
        config: TableFormerConfig with model settings
    """
    self.config = config
    self._device = _resolve_device(config.device)
    self._predictor = None
    self._model_config: Optional[Dict] = None
    self._load_model()

extract

extract(
    image: Union[Image, ndarray, str, Path],
    ocr_output: Optional[OCROutput] = None,
) -> TableOutput

Extract table structure from an image.

PARAMETER DESCRIPTION
image

Table image (should be cropped to table region)

TYPE: Union[Image, ndarray, str, Path]

ocr_output

Optional OCR results for cell text matching

TYPE: Optional[OCROutput] DEFAULT: None

RETURNS DESCRIPTION
TableOutput

TableOutput with cells, structure, and export methods

Example
result = extractor.extract(table_image)
print(f"Table: {result.num_rows}x{result.num_cols}")
html = result.to_html()
Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    ocr_output: Optional["OCROutput"] = None,
) -> TableOutput:
    """
    Extract table structure from an image.

    Args:
        image: Table image (should be cropped to table region)
        ocr_output: Optional OCR results for cell text matching

    Returns:
        TableOutput with cells, structure, and export methods

    Example:
        ```python
        result = extractor.extract(table_image)
        print(f"Table: {result.num_rows}x{result.num_cols}")
        html = result.to_html()
        ```
    """
    # Prepare image
    pil_image = self._prepare_image(image)
    width, height = pil_image.size

    # Convert to OpenCV format (required by TFPredictor)
    try:
        import cv2
    except ImportError:
        raise ImportError(
            "opencv-python is required for TableFormerExtractor. Install with: pip install opencv-python-headless"
        )

    cv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

    # Build iOCR page data
    tokens = self._build_tokens_from_ocr(ocr_output) if ocr_output else []
    iocr_page = {
        "width": width,
        "height": height,
        "image": cv_image,
        "tokens": tokens,
    }

    # Table bbox is the entire image
    table_bbox = [0, 0, width, height]

    # Run prediction
    results = self._predictor.multi_table_predict(
        iocr_page=iocr_page,
        table_bboxes=[table_bbox],
        do_matching=self.config.do_cell_matching,
        correct_overlapping_cells=self.config.correct_overlapping_cells,
        sort_row_col_indexes=self.config.sort_row_col_indexes,
    )

    # Convert results to TableOutput
    return self._convert_results(results, width, height)

config

Configuration for TableFormer table structure extractor.

TableFormer uses a dual-decoder transformer architecture with OTSL+ support for recognizing table structure from images.

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Fast mode (default)
extractor = TableFormerExtractor(config=TableFormerConfig())

# Accurate mode with GPU
extractor = TableFormerExtractor(
    config=TableFormerConfig(
        mode="accurate",
        device="cuda",
        do_cell_matching=True,
    )
)

TableFormerMode

Bases: str, Enum

TableFormer inference mode.

TableFormerConfig

Bases: BaseModel

Configuration for TableFormer table structure extractor.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags and cell bounding boxes.

ATTRIBUTE DESCRIPTION
mode

Inference mode - "fast" or "accurate"

TYPE: TableFormerMode

device

Device for inference - "cpu", "cuda", "mps", or "auto"

TYPE: Literal['cpu', 'cuda', 'mps', 'auto']

num_threads

Number of CPU threads for inference

TYPE: int

do_cell_matching

Whether to match predicted cells with OCR text cells

TYPE: bool

artifacts_path

Path to pre-downloaded model artifacts

TYPE: Optional[str]

repo_id

HuggingFace model repository

TYPE: str

revision

Model revision/tag

TYPE: str

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Fast mode
extractor = TableFormerExtractor(config=TableFormerConfig(mode="fast"))

# Accurate mode with GPU
extractor = TableFormerExtractor(
    config=TableFormerConfig(
        mode="accurate",
        device="cuda",
        do_cell_matching=True,
    )
)

pytorch

TableFormer extractor implementation using PyTorch backend.

Uses the TFPredictor from docling-ibm-models for table structure recognition.

TableFormerExtractor

TableFormerExtractor(config: TableFormerConfig)

Bases: BaseTableExtractor

Table structure extractor using TableFormer model.

TableFormer is a transformer-based model that predicts table structure using OTSL (Optimal Table Structure Language) tags. It can detect: - Cell boundaries (bounding boxes) - Row and column spans - Header cells (column and row headers) - Section rows

Example
from omnidocs.tasks.table_extraction import TableFormerExtractor, TableFormerConfig

# Initialize extractor
extractor = TableFormerExtractor(
    config=TableFormerConfig(mode="fast", device="cuda")
)

# Extract table structure
result = extractor.extract(table_image)

# Get HTML output
html = result.to_html()

# Get DataFrame
df = result.to_dataframe()

Initialize TableFormer extractor.

PARAMETER DESCRIPTION
config

TableFormerConfig with model settings

TYPE: TableFormerConfig

Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def __init__(self, config: TableFormerConfig):
    """
    Initialize TableFormer extractor.

    Args:
        config: TableFormerConfig with model settings
    """
    self.config = config
    self._device = _resolve_device(config.device)
    self._predictor = None
    self._model_config: Optional[Dict] = None
    self._load_model()
extract
extract(
    image: Union[Image, ndarray, str, Path],
    ocr_output: Optional[OCROutput] = None,
) -> TableOutput

Extract table structure from an image.

PARAMETER DESCRIPTION
image

Table image (should be cropped to table region)

TYPE: Union[Image, ndarray, str, Path]

ocr_output

Optional OCR results for cell text matching

TYPE: Optional[OCROutput] DEFAULT: None

RETURNS DESCRIPTION
TableOutput

TableOutput with cells, structure, and export methods

Example
result = extractor.extract(table_image)
print(f"Table: {result.num_rows}x{result.num_cols}")
html = result.to_html()
Source code in omnidocs/tasks/table_extraction/tableformer/pytorch.py
def extract(
    self,
    image: Union[Image.Image, np.ndarray, str, Path],
    ocr_output: Optional["OCROutput"] = None,
) -> TableOutput:
    """
    Extract table structure from an image.

    Args:
        image: Table image (should be cropped to table region)
        ocr_output: Optional OCR results for cell text matching

    Returns:
        TableOutput with cells, structure, and export methods

    Example:
        ```python
        result = extractor.extract(table_image)
        print(f"Table: {result.num_rows}x{result.num_cols}")
        html = result.to_html()
        ```
    """
    # Prepare image
    pil_image = self._prepare_image(image)
    width, height = pil_image.size

    # Convert to OpenCV format (required by TFPredictor)
    try:
        import cv2
    except ImportError:
        raise ImportError(
            "opencv-python is required for TableFormerExtractor. Install with: pip install opencv-python-headless"
        )

    cv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

    # Build iOCR page data
    tokens = self._build_tokens_from_ocr(ocr_output) if ocr_output else []
    iocr_page = {
        "width": width,
        "height": height,
        "image": cv_image,
        "tokens": tokens,
    }

    # Table bbox is the entire image
    table_bbox = [0, 0, width, height]

    # Run prediction
    results = self._predictor.multi_table_predict(
        iocr_page=iocr_page,
        table_bboxes=[table_bbox],
        do_matching=self.config.do_cell_matching,
        correct_overlapping_cells=self.config.correct_overlapping_cells,
        sort_row_col_indexes=self.config.sort_row_col_indexes,
    )

    # Convert results to TableOutput
    return self._convert_results(results, width, height)