If you have ever tried building a custom data extraction
tool for AutoCAD using the classic COM/ActiveX interfaces or vanilla LISP, you
know it can quickly turn into a headache of nested loops, slow execution, and
rigid data structures.
This post demonstrates a modern, high-performance approach
to CAD data mining. By combining PyRx (the Python wrapper for AutoCAD's native
C++ ObjectARX API) with the data science power-duo of Pandas and DuckDB, you
can extract block attributes, run complex relational queries on your drawing,
and export styled reports to Excel—all in a fraction of a second.
- PyRx (ObjectARX Speed): Operates at the native C++ memory
level. It reads drawing databases instantly without the COM automation
bottleneck.
- Dynamically shapes blocks into structured tables,
sanitising whitespace and handling missing columns gracefully.
- DuckDB (In-Memory SQL): Allows you to write standard SQL
(JOIN, GROUP BY, LIST) directly against your live Pandas DataFrames in memory
with zero setup overhead.
- wxPython (Native UI): Leverages AutoCAD's built-in UI
layer to provide a clean, modern file dialog.
"""
CAD Data Extractor & Auditor
Stack: PyRx (ObjectARX), Pandas, DuckDB, wxPython, openpyxl
Context: Extracts block attributes into dataframes, runs relational audits
via SQL, and exports the structured results to a multi-tab Excel file.
"""
from pyrx import Ap, Db, Ed
import pandas as pd
import duckdb
import os
import wx
@Ap.Command()
def doit():
try:
# --- 1. INITIALIZE AUTOCAD & SYSTEM CONTEXTS ---
app = Ap.Application()
wxapp: wx.App = app.wxApp() # Access CAD's native wxPython application instance
db = Db.curDb() # Get the current active database
bt = db.blockTable() # Access the drawing's Block Table
# Dictionary to store structured Pandas DataFrames for each unique block type
# Key: Block Name (str), Value: DataFrame
drawing_tables = {}
# --- 2. FAST DB ITERATION & DATAFRAME GENERATION ---
# PyRx lets us unpack names and Object IDs directly from the Block Table
for block_name, btr_id in bt:
# Skip anonymous blocks (*U...) and temporary audit blocks (A$...)
if block_name.startswith("*") or block_name.startswith("A$"):
continue
btr = Db.BlockTableRecord(btr_id)
if btr.isLayout(): # Skip Model Space and Paper Space layout definitions
continue
rows = []
# Query all physical references (insertions) of this specific block definition
for ref in btr.getBlockReferences():
# Performance Optimization: Skip blocks without attributes immediately
if not ref.hasAttributes():
continue
# Fetch attribute tag/value pairs cleanly via PyRx helper
attrs = ref.attdict()
if attrs:
# Clean trailing/leading spaces from drafting data and log the parent block name
cleaned_attrs = {k.strip(): v.strip() for k, v in attrs.items()}
cleaned_attrs["BLOCK_NAME"] = block_name
rows.append(cleaned_attrs)
# Convert the list of dictionaries into a Pandas DataFrame
if rows:
drawing_tables[block_name] = pd.DataFrame(rows)
# Fail-fast if the drawing has no attribute metadata to process
if not drawing_tables:
print("\nNo block attributes found in this drawing.")
return
# Extract target datasets safely using .get() to prevent KeyErrors if a block type is missing
df_plan = drawing_tables.get("plan", pd.DataFrame())
df_elev = drawing_tables.get("elev", pd.DataFrame())
# Consolidate all individual block dataframes into one master inventory sheet
# Pandas auto-aligns columns, filling mismatched attributes with NaN/None
df_master = pd.concat(drawing_tables.values(), ignore_index=True, sort=False)
# --- 3. RELATIONAL IN-MEMORY ANALYTICS VIA DUCKDB ---
df_inventory = df_master.copy()
df_rooms_matched = pd.DataFrame()
# Query 1: Relational JOIN - Find where both 'plan' and 'elev' blocks coexist in the same ROOM
if not df_plan.empty and not df_elev.empty:
query_join = """
SELECT p.ROOM, p.REF as Plan_Ref, e.REF as Elev_Ref, p.SCALE as Plan_Scale, e.SCALE as Elev_Scale
FROM df_plan p
JOIN df_elev e ON p.ROOM = e.ROOM
ORDER BY p.ROOM
"""
# DuckDB automatically scans the local Python scope to query df_plan and df_elev directly!
df_rooms_matched = duckdb.query(query_join).to_df()
# Query 2: Aggregation & Array Aggregation - Find busy rooms and list their layout contents
query_aggregate = """
SELECT ROOM, COUNT(*) as Total_Blocks, LIST(DISTINCT BLOCK_NAME) as Block_Types_Used
FROM df_master
WHERE ROOM IS NOT NULL AND ROOM != ''
GROUP BY ROOM
HAVING Total_Blocks > 1
ORDER BY Total_Blocks DESC
"""
df_summary = duckdb.query(query_aggregate).to_df()
# --- 4. NATIVE INTERFACE FILE DIALOGUE ---
# Launch a Windows File Dialogue properly parented to AutoCAD's main window
dialog = wx.FileDialog(
wxapp.GetMainTopWindow(),
message="Save Data Extraction Results As",
defaultDir=os.path.expanduser("~\\Documents"),
defaultFile="Drawing_Extraction.xlsx",
wildcard="Excel Files (*.xlsx)|*.xlsx",
style=wx.FD_SAVE
| wx.FD_OVERWRITE_PROMPT, # Automatically handles file overwrite warnings
)
# Handle user choice safely
if dialog.ShowModal() == wx.ID_OK:
export_path = dialog.GetPath()
dialog.Destroy() # Manually clean up UI memory
else:
print("\nExport cancelled by user.")
dialog.Destroy()
return
# --- 5. MULTI-TAB EXCEL EXPORT (via openpyxl) ---
with pd.ExcelWriter(export_path, engine="openpyxl") as writer:
# Sheet 1: Flat, raw master data dump
df_inventory.to_excel(writer, sheet_name="Master Inventory", index=False)
# Sheet 2: The relational architectural audit results
if not df_rooms_matched.empty:
df_rooms_matched.to_excel(writer, sheet_name="Matched Rooms", index=False)
# Sheet 3: Aggregated data metrics
if not df_summary.empty:
# DuckDB's LIST type creates a Python array. Flatten to string for clean Excel display.
df_summary["Block_Types_Used"] = df_summary["Block_Types_Used"].astype(str)
df_summary.to_excel(writer, sheet_name="Room Summary", index=False)
print(f"\nSuccess! Excel file written to: {export_path}")
except Exception as err:
print(f"\nError: {err}")
No comments:
Post a Comment