TableFormer
TableFormer with json and accurate coordinates.¶
refer: docs/getting_started/table_test.ipynb
In [1]:
Copied!
from omnidocs.tasks.table_extraction.extractors import TableFormerExtractor
from omnidocs.tasks.table_extraction.extractors import TableFormerExtractor
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn(
In [2]:
Copied!
image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = TableFormerExtractor().extract(image_path)
image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = TableFormerExtractor().extract(image_path)
INFO [timestamp]2025-07-31 13:29:40[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.tableformer[/] | [function]logging.py:150[/] | [info]extract completed in 1.50s[/]
INFO [timestamp]2025-07-31 13:29:40[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.tableformer[/] | [function]logging.py:150[/] | [info]extract completed in 1.50s[/]
In [3]:
Copied!
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
Table 1: 6 rows x 7 columns Total cells: 39 Non-empty cells: 39 [0,0]: 'Cell_0_0' [0,1]: 'Cell_0_1' [0,2]: 'Cell_0_2' [0,3]: 'Cell_0_3' [0,4]: 'Cell_0_4' [0,5]: 'Cell_0_5' [0,6]: 'Cell_0_6' [1,0]: 'Cell_1_0' [1,1]: 'Cell_1_1' [1,2]: 'Cell_1_2'
In [ ]:
Copied!