Surya Table

In [1]:

Copied!

from omnidocs.tasks.table_extraction.extractors import SuryaTableExtractor
from omnidocs.tasks.table_extraction.extractors import SuryaTableExtractor

c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
  warnings.warn(

In [4]:

Copied!

image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = SuryaTableExtractor().extract(image_path)
image_path = "../../../../tests/table_extraction/assets/table_image.png"
result = SuryaTableExtractor().extract(image_path)

Recognizing layout: 100%|██████████| 1/1 [00:04<00:00,  4.21s/it]
Detecting bboxes: 100%|██████████| 1/1 [00:03<00:00,  3.03s/it]
Recognizing Text: 100%|██████████| 38/38 [00:20<00:00,  1.87it/s]

INFO     [timestamp]2025-07-31 13:25:01[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.surya_table[/]
         | [function]logging.py:150[/] | [info]extract completed in 27.89s[/]

INFO     [timestamp]2025-07-31 13:25:01[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.surya_table[/]
         | [function]logging.py:150[/] | [info]extract completed in 27.89s[/]

In [5]:

Copied!





for i, table in enumerate(result.tables):
                print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
                print(f"Total cells: {len(table.cells)}")

                if table.cells:
                    non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
                    print(f"Non-empty cells: {len(non_empty_cells)}")

                    # Show first few cells
                    for cell in table.cells[:10]:
                        if cell.text.strip():
                            text = cell.text.strip()[:30]
                            print(f"  [{cell.row},{cell.col}]: '{text}'")
for i, table in enumerate(result.tables):
                print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
                print(f"Total cells: {len(table.cells)}")

                if table.cells:
                    non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
                    print(f"Non-empty cells: {len(non_empty_cells)}")

                    # Show first few cells
                    for cell in table.cells[:10]:
                        if cell.text.strip():
                            text = cell.text.strip()[:30]
                            print(f"  [{cell.row},{cell.col}]: '{text}'")

Table 1: 12 rows x 6 columns
Total cells: 38
Non-empty cells: 38
  [0,0]: '<b>Results</b>'
  [1,0]: '<b>Ballots</b>'
  [2,0]: '<b>Disability</b>'
  [2,1]: '<b>Ballots</b>'
  [3,0]: '<b>Participants</b>'
  [3,1]: 'Incomplete/'
  [4,0]: '<b>Category</b>'
  [4,1]: '<b>Completed</b>'
  [4,2]: '<b>Accuracy</b>'
  [4,3]: '<b>Time to</b>'