Camelot¶
In [2]:
Copied!
from omnidocs.tasks.table_extraction.extractors import CamelotExtractor
from omnidocs.tasks.table_extraction.extractors import CamelotExtractor
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\transformers\utils\hub.py:111: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn(
In [3]:
Copied!
pdf_path = "../../../../tests/table_extraction/assets/table_document.pdf"
result = CamelotExtractor().extract(pdf_path)
pdf_path = "../../../../tests/table_extraction/assets/table_document.pdf"
result = CamelotExtractor().extract(pdf_path)
c:\Users\laxma\OneDrive\Desktop\CogLab\11-07-2025\Omnidocs\new\Lib\site-packages\pypdf\_crypt_providers\_cryptography.py:32: CryptographyDeprecationWarning: ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from cryptography.hazmat.primitives.ciphers.algorithms in 48.0.0. from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4
INFO [timestamp]2025-07-31 13:10:29[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.camelot[/] | [function]logging.py:150[/] | [info]extract completed in 1.29s[/]
INFO [timestamp]2025-07-31 13:10:29[/] | [logger.name]omnidocs.tasks.table_extraction.extractors.camelot[/] | [function]logging.py:150[/] | [info]extract completed in 1.29s[/]
In [4]:
Copied!
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
for i, table in enumerate(result.tables):
print(f"\nTable {i+1}: {table.num_rows} rows x {table.num_cols} columns")
print(f"Total cells: {len(table.cells)}")
if table.cells:
non_empty_cells = [cell for cell in table.cells if cell.text.strip()]
print(f"Non-empty cells: {len(non_empty_cells)}")
# Show first few cells
for cell in table.cells[:10]:
if cell.text.strip():
text = cell.text.strip()[:30]
print(f" [{cell.row},{cell.col}]: '{text}'")
Table 1: 6 rows x 6 columns Total cells: 36 Non-empty cells: 31 [0,0]: 'Disability Category' [0,1]: 'Participants' [0,2]: 'Ballots Completed' [0,3]: 'Ballots Incomplete/ Terminat' [0,4]: 'Results'