Surya Table
In [1]:
Copied!
from omnidocs.tasks.table_extraction.extractors import SuryaTableExtractor
from omnidocs.tasks.table_extraction.extractors import SuryaTableExtractor
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn(
In [4]:
Copied!
image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = SuryaTableExtractor().extract(image_path)
image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = SuryaTableExtractor().extract(image_path)
Recognizing layout: 100%|██████████| 1/1 [00:04<00:00, 4.21s/it] Detecting bboxes: 100%|██████████| 1/1 [00:03<00:00, 3.03s/it] Recognizing Text: 100%|██████████| 38/38 [00:20<00:00, 1.87it/s]
INFO [timestamp]2025-07-31 13:25:01[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.surya_table[/] | [function]logging.py:150[/] | [info]extract completed in 27.89s[/]
INFO [timestamp]2025-07-31 13:25:01[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.surya_table[/] | [function]logging.py:150[/] | [info]extract completed in 27.89s[/]
In [5]:
Copied!
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
Table 1: 12 rows x 6 columns Total cells: 38 Non-empty cells: 38 [0,0]: '<b>Results</b>' [1,0]: '<b>Ballots</b>' [2,0]: '<b>Disability</b>' [2,1]: '<b>Ballots</b>' [3,0]: '<b>Participants</b>' [3,1]: 'Incomplete/' [4,0]: '<b>Category</b>' [4,1]: '<b>Completed</b>' [4,2]: '<b>Accuracy</b>' [4,3]: '<b>Time to</b>'