Tutorial Delete Duplicate Groups with Python

Python
DomoJupyter
DomoGroup
Author

Jae Wilson

Published

December 18, 2023

DELETE Duplicate Groups

sometimes you have duplicate groups and want to remove all groups that match a regex expression.

  1. get all groups from an instance

  2. use list comprehension and the ACCOUNT_MATCH to find groups whose name matches any of the account_match criteria

  3. write a function that receives a parameter is_test : bool = True

    • if is_test == True return a pd.DataFrame of accounts to be deleted
    • if is_test == False delete the groups

use this class https://github.com/jaewilson07/domo_library/blob/main/nbs/classes/50_DomoGroup.ipynb

tutorial walthrough https://youtu.be/1EQJ1R5fiGQ

# %pip install --upgrade  domolibrary
# %pip install python-dotenv
import domolibrary
import pandas as pd
import os
domolibrary.__version__
'4.3.1'

CONFIG ENV and CREDENTIALS

# from dotenv import load_dotenv


domo_instance = os.environ['DOMO_INSTANCE']
domo_access_token = os.environ['DOMO_ACCESS_TOKEN']
import domolibrary.client.DomoAuth as dmda

# do not alter this cell

instance_auth = dmda.DomoTokenAuth(
    domo_access_token= domo_access_token,
    domo_instance=domo_instance,
)

# this will test if you have valid credentails at TEST_INSTANCE
# if you don't ask me for access
await instance_auth.print_is_token()
🎉 token_auth token retrieved from domo-community ⚙️
True

handle regex

import os
import urllib.parse as urllib_parse

# execution_instance = urllib_parse.urlparse(os.environ.get('DOMO_HOSTNAME')).netloc.replace('.domo.com', '')
# config_instance = execution_instance

ACCOUNT_MATCH = [r"^sdk_.* \(.*"]


test_values = [
    "sdk_alphabet (3)",
    "sdk_hello_duplicate (4)",
    "sdk_group_2",
    "sdk_group_1",
]
# setup test
import domolibrary.classes.DomoGroup as dmg
import domolibrary.utils.chunk_execution as ce

try:
    await ce.gather_with_concurrency(
        *[
            dmg.DomoGroup.create_from_name(group_name=value, auth=instance_auth)
            for value in test_values
        ],
        n=10
    )

except Exception as e:
    print(e)
🛑  Group_CRUD_Error 🛑 - function: DomoGroup.get_traceback || status 400 || sdk_group_1 already exists. Choose a different group_name at domo-community
# Check if string matches regex list
# Using join regex + loop + re.match()
import re
from typing import List


def test_regex(text_str: str, regex_ls: List[str]) -> bool:
    regex = "(?:% s)" % "|".join(regex_ls)

    return bool(re.match(regex, text_str))


[test_regex(test_value, ACCOUNT_MATCH) for test_value in test_values]
[True, True, False, False]

⚙️ Auth Objects

# do not alter this cell



config_company_ls = pd.DataFrame(
    [{"instance_auth": instance_auth, "domo_instance": domo_instance, "is_test": True}]
)

test_row = next((row for index, row in config_company_ls.iterrows()))
test_row
instance_auth    DomoTokenAuth(domo_instance='domo-community', ...
domo_instance                                       domo-community
is_test                                                       True
Name: 0, dtype: object

CONFIG FUNCTIONS

implement functions here.

  • write them in a test-able way so that you can test each function independently.
  • in the function definition make sure to include input type and the return type.
  • include a test using test_row
  • the last function should be called process_company() and receive a DomoAuth object.

notice, most of your functions will be async functions (implying they can be run asynchronously

All async functions must be await-ed

import domolibrary.classes.DomoGroup as dmg
from typing import List


async def get_and_filter_domo_groups(
    auth: dmda.DomoAuth,
    regex_ls: List[str],
) -> List[dmg.DomoGroup]:
    domo_groups = dmg.DomoGroups(auth=auth)
    
    groups = await domo_groups.get()

    return [
        dg
        for dg in groups
        if test_regex(text_str=dg.name, regex_ls=regex_ls)
    ]


await get_and_filter_domo_groups(auth=test_row.instance_auth, regex_ls=ACCOUNT_MATCH)
[DomoGroup(id=1263741810, name='sdk_alphabet (3)', type='open', is_system=False, description=None, custom_attributes={}),
 DomoGroup(id=1586482197, name='sdk_hello_duplicate (4)', type='open', is_system=False, description=None, custom_attributes={})]
import domolibrary.utils.chunk_execution as ce


async def process_instance(
    instance_auth: dmda.DomoAuth,
    regex_ls: List[str],
    is_test: bool = True,
) -> pd.DataFrame:
    domo_groups = await get_and_filter_domo_groups(
        auth=instance_auth, regex_ls=regex_ls
    )

    df = pd.DataFrame(columns=["group_name", "group_type", "domo_instance"])

    if len(domo_groups) == 0:
        return df

    df = pd.DataFrame(
        [
            {
                "group_name": domo_group.name,
                "group_type": domo_group.type,
                "domo_instance": domo_group.auth.domo_instance,
            }
            for domo_group in domo_groups
        ]
    )

    if is_test:
        return df

    try:
        res_ls = await ce.gather_with_concurrency(
            *[domo_group.delete() for domo_group in domo_groups], n=10
        )

        df["is_deleted"] = [res.response for res in res_ls]

        return df
    except dmg.Group_CRUD_Error as e:
        print(e)
        return None


try:
    await ce.gather_with_concurrency(
        *[
            dmg.DomoGroup.create_from_name(
                group_name=value, auth=test_row.instance_auth
            )
            for value in test_values
        ],
        n=10
    )
except Exception as e:
    print(e)

await process_instance(
    instance_auth=test_row.instance_auth,
    is_test=False,
    regex_ls=ACCOUNT_MATCH,
)
🛑  Group_CRUD_Error 🛑 - function: DomoGroup.get_traceback || status 400 || sdk_group_1 already exists. Choose a different group_name at domo-community
group_name group_type domo_instance is_deleted
0 sdk_alphabet (3) open domo-community deleted 1263741810 from domo-community
1 sdk_hello_duplicate (4) open domo-community deleted 1586482197 from domo-community

MAIN

main wil loop over a dataframe, config_company_ls where each row has a valid instance_auth (the admin login credentials).

some scripts have special configuration settings which would be extracted from the row ex is_configure_allowlist or is_enable_social_users.

you should not need to alter main.

# do not alter this code block

import domolibrary.utils.chunk_execution as ce


async def main(
    regex_ls: List[str],  # list of regex expressions to match
    is_test: bool = True,
):
    df_ls = await ce.gather_with_concurrency(
        *[
            process_instance(
                instance_auth=row.instance_auth,
                is_test=is_test,
                regex_ls=regex_ls,
            )
            for index, row in config_company_ls.iterrows()
        ],
        n=10
    )

    return pd.concat(df_ls)
import domolibrary.client.DomoError as dmde
try:
    if not test_values:
        await ce.gather_with_concurrency(
            *[
                dmg.DomoGroup.create_from_name(
                    group_name=value, auth=test_row.instance_auth
                )
                for value in test_values
            ],
            n=10
        )
except dmde.DomoError as e:
    print(e)

await main(is_test=False, regex_ls=ACCOUNT_MATCH)
group_name group_type domo_instance