Tutorial - Extract SQL from a Dataflow

Instance Configuration
Dataflow
Author

Jae Wilson

Published

February 2, 2023

⚙️ Configuration

# pip install --upgrade  domolibrary
# pip install sqlparse
# pip install sql-metadata

⚙️ Auth Object

Configure a DomoAuth object that will be used to interact with DomoDatasets

import os

domo_instance = os.environ['DOMO_INSTANCE']
domo_access_token = os.environ['DOMO_ACCESS_TOKEN']
sql_dataflow_id = 70
import domolibrary.client.DomoAuth as dmda

test_auth = dmda.DomoTokenAuth(
    domo_instance=domo_instance,
    domo_access_token=domo_access_token
)

⚙️ Dataflow Object

Configure a DomoDataflow

import domolibrary.classes.DomoDataflow as dmdf


async def generate_dataflow(dataflow_id: str, auth: dmda.DomoAuth):
    """generates a DomoDataset class object from a dataset_id"""

    return await dmdf.DomoDataflow.get_by_id(dataflow_id=dataflow_id, auth=auth)


# test
df = await generate_dataflow(dataflow_id=sql_dataflow_id, auth=test_auth)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 import domolibrary.classes.DomoDataflow as dmdf
      4 async def generate_dataflow(dataflow_id: str, auth: dmda.DomoAuth):
      5     """generates a DomoDataset class object from a dataset_id"""

File /workspaces/domolibrary/domolibrary/classes/DomoDataflow.py:17
     14 import domolibrary.client.DomoError as dmde
     15 import domolibrary.client.DomoAuth as dmda
---> 17 import domolibrary.classes.DomoLineage as dmdl
     19 import httpx
     21 from nbdev.showdoc import patch_to

File /workspaces/domolibrary/domolibrary/classes/DomoLineage.py:16
     13 import httpx
     15 import domolibrary.client.DomoAuth as dmda
---> 16 import domolibrary.classes.DomoDatacenter as dmdc
     17 import domolibrary.client.DomoError as dmde
     19 import domolibrary.routes.datacenter as datacenter_routes

File /workspaces/domolibrary/domolibrary/classes/DomoDatacenter.py:255
    228     return await dmce.gather_with_concurrency(
    229         n=60,
    230         *[
   (...)
    238         ]
    239     )
    241 # %% ../../nbs/classes/50_DomoDatacenter.ipynb 20
    242 @patch_to(DomoDatacenter, cls_method=True)
    243 async def get_cards_admin_summary(
    244     cls,
    245     auth=dmda.DomoAuth,
    246     page_ids: List[str] = None,
    247     card_search_text: str = None,
    248     page_search_text: str = None,
    249     maximum: int = None,  # maximum number of results to return
    250     # can accept one value or a list of values
    251     return_raw: bool = False,
    252     debug_api: bool = False,
    253     debug_loop: bool = False,
    254     session: httpx.AsyncClient = None,
--> 255 ) -> list[dmc.DomoCard]:
    256     """search Domo Datacenter card api."""
    258     import domolibrary.classes.DomoCard as dmc

NameError: name 'dmc' is not defined

💾 Process Actions

DomoDataflow_Action definitions are embeded in the DomoDataflow.actions property.

The DomoDataflow_Action class only captures a portion of the available properties across all the different action types, so this can be extended as needed.

SQL statements are captured in DomoDataflow_Action.sql and can therefore be parsed or reformatted for presentation in other documentation tools. In the example below we use the [sqlparse(https://github.com/andialbrecht/sqlparse]) and [sql_metadata(https://github.com/macbre/sql-metadata)] library to parse and present a dataflow.

import sqlparse
import sql_metadata as sqlmd
import pandas as pd


def process_actions(domo_dataflow: dmdf.DomoDataflow):
    """exctract SQL from actions"""

    domo_actions = domo_dataflow.actions

    if not domo_actions:
        return

    clean_sql = []

    for action in domo_actions:
        sql = action.sql

        # not all domo actions will contain sql.
        if not sql:
            continue

        # alter according to your needs
        clean_sql.append(
            {
                "transform_sql": sqlparse.format(
                    sql, reindent_aligned=True, keyword_case="upper"
                ),
                "referenced_tables": ", ".join(sqlmd.Parser(sql).tables),
                "target_table": action.name,
            }
        )

    return clean_sql


res = process_actions(domo_dataflow=df)
pd.DataFrame(res)
transform_sql referenced_tables target_table
0 SELECT `Display Name`,\n `User ID` , @ro... domostats_people t_1
1 SELECT b.`User ID` AS userID,\n 'reports... t_1 reportsto
2 SELECT *\n FROM `reportsto` reportsto domoStats_people_reports