Thursday, June 25, 2026

Supercharging AutoCAD Data Extraction with PyRx, Pandas, and DuckDB

 

If you have ever tried building a custom data extraction tool for AutoCAD using the classic COM/ActiveX interfaces or vanilla LISP, you know it can quickly turn into a headache of nested loops, slow execution, and rigid data structures.

This post demonstrates a modern, high-performance approach to CAD data mining. By combining PyRx (the Python wrapper for AutoCAD's native C++ ObjectARX API) with the data science power-duo of Pandas and DuckDB, you can extract block attributes, run complex relational queries on your drawing, and export styled reports to Excel—all in a fraction of a second.

Why This Stack Rules:

- PyRx (ObjectARX Speed): Operates at the native C++ memory level. It reads drawing databases instantly without the COM automation bottleneck.

- Dynamically shapes blocks into structured tables, sanitising whitespace and handling missing columns gracefully.

- DuckDB (In-Memory SQL): Allows you to write standard SQL (JOIN, GROUP BY, LIST) directly against your live Pandas DataFrames in memory with zero setup overhead.

- wxPython (Native UI): Leverages AutoCAD's built-in UI layer to provide a clean, modern file dialog.

 

"""
CAD Data Extractor & Auditor
Stack: PyRx (ObjectARX), Pandas, DuckDB, wxPython, openpyxl
Context: Extracts block attributes into dataframes, runs relational audits
         via SQL, and exports the structured results to a multi-tab Excel file.
"""

from pyrx import Ap, Db, Ed
import pandas as pd
import duckdb
import os
import wx


@Ap.Command()
def doit():
    try:
        # --- 1. INITIALIZE AUTOCAD & SYSTEM CONTEXTS ---
        app = Ap.Application()
        wxapp: wx.App = app.wxApp()  # Access CAD's native wxPython application instance
        db = Db.curDb()  # Get the current active database
        bt = db.blockTable()  # Access the drawing's Block Table

        # Dictionary to store structured Pandas DataFrames for each unique block type
        # Key: Block Name (str), Value: DataFrame
        drawing_tables = {}

        # --- 2. FAST DB ITERATION & DATAFRAME GENERATION ---
        # PyRx lets us unpack names and Object IDs directly from the Block Table
        for block_name, btr_id in bt:

            # Skip anonymous blocks (*U...) and temporary audit blocks (A$...)
            if block_name.startswith("*") or block_name.startswith("A$"):
                continue

            btr = Db.BlockTableRecord(btr_id)
            if btr.isLayout():  # Skip Model Space and Paper Space layout definitions
                continue

            rows = []
            # Query all physical references (insertions) of this specific block definition
            for ref in btr.getBlockReferences():
                # Performance Optimization: Skip blocks without attributes immediately
                if not ref.hasAttributes():
                    continue

                # Fetch attribute tag/value pairs cleanly via PyRx helper
                attrs = ref.attdict()
                if attrs:
                    # Clean trailing/leading spaces from drafting data and log the parent block name
                    cleaned_attrs = {k.strip(): v.strip() for k, v in attrs.items()}
                    cleaned_attrs["BLOCK_NAME"] = block_name
                    rows.append(cleaned_attrs)

            # Convert the list of dictionaries into a Pandas DataFrame
            if rows:
                drawing_tables[block_name] = pd.DataFrame(rows)

        # Fail-fast if the drawing has no attribute metadata to process
        if not drawing_tables:
            print("\nNo block attributes found in this drawing.")
            return

        # Extract target datasets safely using .get() to prevent KeyErrors if a block type is missing
        df_plan = drawing_tables.get("plan", pd.DataFrame())
        df_elev = drawing_tables.get("elev", pd.DataFrame())

        # Consolidate all individual block dataframes into one master inventory sheet
        # Pandas auto-aligns columns, filling mismatched attributes with NaN/None
        df_master = pd.concat(drawing_tables.values(), ignore_index=True, sort=False)

        # --- 3. RELATIONAL IN-MEMORY ANALYTICS VIA DUCKDB ---
        df_inventory = df_master.copy()
        df_rooms_matched = pd.DataFrame()

        # Query 1: Relational JOIN - Find where both 'plan' and 'elev' blocks coexist in the same ROOM
        if not df_plan.empty and not df_elev.empty:
            query_join = """
                SELECT p.ROOM, p.REF as Plan_Ref, e.REF as Elev_Ref, p.SCALE as Plan_Scale, e.SCALE as Elev_Scale
                FROM df_plan p
                JOIN df_elev e ON p.ROOM = e.ROOM
                ORDER BY p.ROOM
            """
            # DuckDB automatically scans the local Python scope to query df_plan and df_elev directly!
            df_rooms_matched = duckdb.query(query_join).to_df()

        # Query 2: Aggregation & Array Aggregation - Find busy rooms and list their layout contents
        query_aggregate = """
            SELECT ROOM, COUNT(*) as Total_Blocks, LIST(DISTINCT BLOCK_NAME) as Block_Types_Used
            FROM df_master
            WHERE ROOM IS NOT NULL AND ROOM != ''
            GROUP BY ROOM
            HAVING Total_Blocks > 1
            ORDER BY Total_Blocks DESC
        """
        df_summary = duckdb.query(query_aggregate).to_df()

        # --- 4. NATIVE INTERFACE FILE DIALOGUE ---
        # Launch a Windows File Dialogue properly parented to AutoCAD's main window
        dialog = wx.FileDialog(
            wxapp.GetMainTopWindow(),
            message="Save Data Extraction Results As",
            defaultDir=os.path.expanduser("~\\Documents"),
            defaultFile="Drawing_Extraction.xlsx",
            wildcard="Excel Files (*.xlsx)|*.xlsx",
            style=wx.FD_SAVE
            | wx.FD_OVERWRITE_PROMPT,  # Automatically handles file overwrite warnings
        )

        # Handle user choice safely
        if dialog.ShowModal() == wx.ID_OK:
            export_path = dialog.GetPath()
            dialog.Destroy()  # Manually clean up UI memory
        else:
            print("\nExport cancelled by user.")
            dialog.Destroy()
            return

        # --- 5. MULTI-TAB EXCEL EXPORT (via openpyxl) ---
        with pd.ExcelWriter(export_path, engine="openpyxl") as writer:
            # Sheet 1: Flat, raw master data dump
            df_inventory.to_excel(writer, sheet_name="Master Inventory", index=False)

            # Sheet 2: The relational architectural audit results
            if not df_rooms_matched.empty:
                df_rooms_matched.to_excel(writer, sheet_name="Matched Rooms", index=False)

            # Sheet 3: Aggregated data metrics
            if not df_summary.empty:
                # DuckDB's LIST type creates a Python array. Flatten to string for clean Excel display.
                df_summary["Block_Types_Used"] = df_summary["Block_Types_Used"].astype(str)
                df_summary.to_excel(writer, sheet_name="Room Summary", index=False)

        print(f"\nSuccess! Excel file written to: {export_path}")

    except Exception as err:
        print(f"\nError: {err}")


 


 

Friday, May 29, 2026

Fine-Tuning Selection Set Filtering with Callbacks in PyRx

As of version 2.2.58, most selection set actions in PyRx have received a powerful overload that allows developers to attach a

When executing a selection command, the callback function automatically intercepts a Db.ObjectIdArray containing the candidate objects. Your logic then evaluates these objects and returns a list of integers representing the indexes of the IDs that should be discarded from the final selection.
Key Benefits of Selection Callbacks
  • Advanced Criteria: Filter by complex properties (like geometric limits or dynamic states) that standard DXF group codes cannot check.
  • Streamlined Workflow: Eliminate the need to manually iterate over and clean up selection sets after user input.
  • Real-time Precision: Ensure your commands only process exactly what they are intended to handle.
The Complete Python Code
Here is how to implement a dynamic block filter using the new callback overload.
 
import traceback
from pyrx import Ap, Db, Ed, Ge, Rx

def dyn_filter(ids: Db.ObjectIdArray) -> list[int]:
    """
    Callback filter that evaluates a list of selected Object IDs.
    Returns a list of list indexes pointing to objects that should be removed.
    """
    itemsToRemove = []
   
    # Enumerate through the selection array to track both index and object ID
    for idx, id in enumerate(ids):
        # Open the ID as a BlockReference to inspect its properties
        ref = Db.BlockReference(id)
       
        # Check if the block reference is NOT a dynamic block
        if not ref.isDynamicBlock():
            # Flag the index for removal if it fails the condition
            itemsToRemove.append(idx)
           
    return itemsToRemove

@Ap.Command()
def pyselectfcb():
    """
    Custom PyRx command to prompt user selection using the custom filter.
    """
    try:
        # Prompt selection filtering for 'INSERT' (Block References)
        # Pass the 'dyn_filter' function as the second argument callback
        ps, ss = Ed.Editor.select([(0, "INSERT")], dyn_filter)
       
        # If the selection prompt was successful, print the result status and size
        if ps == Ed.PromptStatus.eOk:
            print(f"Selection Status: {ps}, Filtered Count: {ss.size()}")
           
    except Exception:
        # Catch and print any runtime exceptions for debugging
        print(traceback.format_exc())


 
Code Walkthrough
  1. Ed.Editor.select Overload: We pass dyn_filter as a functional parameter directly into our selection method.
  2. Indexing Strategy: The callback tracks idx. If a block fails the isDynamicBlock() check, its exact list index is recorded.
  3. Array Cleanup: PyRx reads the returned itemsToRemove list and instantly strips those matching positions out of your final selection set ss.
 
 
Only the Dynamic Blocks are selected, yay!
 

Supercharging AutoCAD Data Extraction with PyRx, Pandas, and DuckDB

  If you have ever tried building a custom data extraction tool for AutoCAD using the classic COM/ActiveX interfaces or vanilla LISP, you ...