# pip install --upgrade domolibrary
# pip install sqlparse
# pip install sql-metadataTutorial - Extract SQL from a Dataflow
Instance Configuration
Dataflow
⚙️ Configuration
⚙️ Auth Object
Configure a DomoAuth object that will be used to interact with DomoDatasets
import os
domo_instance = os.environ['DOMO_INSTANCE']
domo_access_token = os.environ['DOMO_ACCESS_TOKEN']
sql_dataflow_id = 70import domolibrary.client.DomoAuth as dmda
test_auth = dmda.DomoTokenAuth(
domo_instance=domo_instance,
domo_access_token=domo_access_token
)⚙️ Dataflow Object
Configure a DomoDataflow
import domolibrary.classes.DomoDataflow as dmdf
async def generate_dataflow(dataflow_id: str, auth: dmda.DomoAuth):
"""generates a DomoDataset class object from a dataset_id"""
return await dmdf.DomoDataflow.get_by_id(dataflow_id=dataflow_id, auth=auth)
# test
df = await generate_dataflow(dataflow_id=sql_dataflow_id, auth=test_auth)--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[4], line 1 ----> 1 import domolibrary.classes.DomoDataflow as dmdf 4 async def generate_dataflow(dataflow_id: str, auth: dmda.DomoAuth): 5 """generates a DomoDataset class object from a dataset_id""" File /workspaces/domolibrary/domolibrary/classes/DomoDataflow.py:17 14 import domolibrary.client.DomoError as dmde 15 import domolibrary.client.DomoAuth as dmda ---> 17 import domolibrary.classes.DomoLineage as dmdl 19 import httpx 21 from nbdev.showdoc import patch_to File /workspaces/domolibrary/domolibrary/classes/DomoLineage.py:16 13 import httpx 15 import domolibrary.client.DomoAuth as dmda ---> 16 import domolibrary.classes.DomoDatacenter as dmdc 17 import domolibrary.client.DomoError as dmde 19 import domolibrary.routes.datacenter as datacenter_routes File /workspaces/domolibrary/domolibrary/classes/DomoDatacenter.py:255 228 return await dmce.gather_with_concurrency( 229 n=60, 230 *[ (...) 238 ] 239 ) 241 # %% ../../nbs/classes/50_DomoDatacenter.ipynb 20 242 @patch_to(DomoDatacenter, cls_method=True) 243 async def get_cards_admin_summary( 244 cls, 245 auth=dmda.DomoAuth, 246 page_ids: List[str] = None, 247 card_search_text: str = None, 248 page_search_text: str = None, 249 maximum: int = None, # maximum number of results to return 250 # can accept one value or a list of values 251 return_raw: bool = False, 252 debug_api: bool = False, 253 debug_loop: bool = False, 254 session: httpx.AsyncClient = None, --> 255 ) -> list[dmc.DomoCard]: 256 """search Domo Datacenter card api.""" 258 import domolibrary.classes.DomoCard as dmc NameError: name 'dmc' is not defined
💾 Process Actions
DomoDataflow_Action definitions are embeded in the DomoDataflow.actions property.
The DomoDataflow_Action class only captures a portion of the available properties across all the different action types, so this can be extended as needed.
SQL statements are captured in DomoDataflow_Action.sql and can therefore be parsed or reformatted for presentation in other documentation tools. In the example below we use the [sqlparse(https://github.com/andialbrecht/sqlparse]) and [sql_metadata(https://github.com/macbre/sql-metadata)] library to parse and present a dataflow.
import sqlparse
import sql_metadata as sqlmd
import pandas as pd
def process_actions(domo_dataflow: dmdf.DomoDataflow):
"""exctract SQL from actions"""
domo_actions = domo_dataflow.actions
if not domo_actions:
return
clean_sql = []
for action in domo_actions:
sql = action.sql
# not all domo actions will contain sql.
if not sql:
continue
# alter according to your needs
clean_sql.append(
{
"transform_sql": sqlparse.format(
sql, reindent_aligned=True, keyword_case="upper"
),
"referenced_tables": ", ".join(sqlmd.Parser(sql).tables),
"target_table": action.name,
}
)
return clean_sql
res = process_actions(domo_dataflow=df)
pd.DataFrame(res)| transform_sql | referenced_tables | target_table | |
|---|---|---|---|
| 0 | SELECT `Display Name`,\n `User ID` , @ro... | domostats_people | t_1 |
| 1 | SELECT b.`User ID` AS userID,\n 'reports... | t_1 | reportsto |
| 2 | SELECT *\n FROM `reportsto` | reportsto | domoStats_people_reports |