diff --git a/dataform/examples/dataform_assertion_unit_test/dataform.json b/dataform/examples/dataform_assertion_unit_test/dataform.json index 7b1a49913..b793366c4 100644 --- a/dataform/examples/dataform_assertion_unit_test/dataform.json +++ b/dataform/examples/dataform_assertion_unit_test/dataform.json @@ -3,5 +3,5 @@ "defaultSchema": "dataform", "assertionSchema": "dataform_assertions", "defaultDatabase": "YOUR_PROJECT_ID", - "useRunCache": false + "defaultLocation": "us" } diff --git a/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_email_assertions.js b/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_email_assertions.js index 717f862e8..ea21de592 100644 --- a/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_email_assertions.js +++ b/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_email_assertions.js @@ -24,7 +24,7 @@ const test_cases = { Otherwise it will show that the custom data quality rules failed. */ - "ruinanliu@google.com" : "TRUE", + "ruinanliu@domain.com" : "TRUE", "among.us@amongus.net" : "TRUE", "1736#$%.com" : "FALSE" }; diff --git a/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_telephone_number_assertions.js b/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_phone_number_assertions.js similarity index 94% rename from dataform/examples/dataform_assertion_unit_test/definitions/tests/test_telephone_number_assertions.js rename to dataform/examples/dataform_assertion_unit_test/definitions/tests/test_phone_number_assertions.js index e702e2046..fe77c0b6b 100644 --- a/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_telephone_number_assertions.js +++ b/dataform/examples/dataform_assertion_unit_test/definitions/tests/test_phone_number_assertions.js @@ -13,7 +13,7 @@ // limitations under the License. const {generate_test} = unit_test_utils; -const {test_phone_number} = phone_assertions; +const {test_phone_number} = phone_number_assertions; const test_name = "test_telephone_number_assertions"; const test_cases = { /* @@ -32,7 +32,7 @@ const test_cases = { "7928374657" : "TRUE", "7847563738" : "TRUE", "6768907654" : "TRUE", - "1234567" : "FALSE", + "1234567" : "FALSE", "0123456789" : "FALSE", "1111111111" : "FALSE", "374657389a" : "FALSE" diff --git a/dataform/examples/dataform_assertion_unit_test/includes/date_assertions.js b/dataform/examples/dataform_assertion_unit_test/includes/date_assertions.js index 9ae90547e..79b8d05ef 100644 --- a/dataform/examples/dataform_assertion_unit_test/includes/date_assertions.js +++ b/dataform/examples/dataform_assertion_unit_test/includes/date_assertions.js @@ -15,53 +15,53 @@ /* This assertion checks whether input date is future */ -function test_future_date(colName){ - var result_query = `PARSE_DATE('%Y/%m/%d', ${colName}) < CURRENT_DATE()` - return result_query +function test_future_date(colName) { + var result_query = `PARSE_DATE('%Y/%m/%d', ${colName}) < CURRENT_DATE()`; + return result_query; } /* This assertion checks whether the input birthdate is less than 100 yrs old */ -function test_valid_years(colName){ - var result_query = `DATE_DIFF(CURRENT_DATE(), PARSE_DATE('%Y/%m/%d', ${colName}), YEAR) < 100` - return result_query +function test_valid_years(colName) { + var result_query = `DATE_DIFF(CURRENT_DATE(), PARSE_DATE('%Y/%m/%d', ${colName}), YEAR) < 100`; + return result_query; } /* This function checks whether the format of the date is correct */ -function test_date_format(colName, date_format){ - if(date_format == "yyyy/mm/dd"){ - var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[/][0-9]{2}[/][0-9]{2}$')` - return result_query - } else if (date_format == "yyyymmdd"){ - var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[0-9]{2}[0-9]{2}$')` - return result_query - }else{ - return `FALSE` - } +function test_date_format(colName, date_format) { + if (date_format == "yyyy/mm/dd") { + var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[/][0-9]{2}[/][0-9]{2}$')`; + return result_query; + } else if (date_format == "yyyymmdd") { + var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[0-9]{2}[0-9]{2}$')`; + return result_query; + } else { + return `FALSE`; + } } /* This assertions combines custom assertions for testing future date and valid years */ -function test_date(colName){ - var result_query = +function test_date(colName) { + var result_query = `IF(${colName} IS NOT NULL AND ${colName} <> "",` + - `IF(${test_date_format(colName, "yyyy/mm/dd")}, ` + - `IF(${test_future_date(colName)}, ` + - `${test_valid_years(colName, 100)}` + - `, FALSE),` + - `IF(${test_date_format(colName, "yyyymmdd")}, ` + - `TRUE, FALSE)), FALSE)` - return result_query + `IF(${test_date_format(colName, "yyyy/mm/dd")}, ` + + `IF(${test_future_date(colName)}, ` + + `${test_valid_years(colName, 100)}` + + `, FALSE),` + + `IF(${test_date_format(colName, "yyyymmdd")}, ` + + `TRUE, FALSE)), FALSE)`; + return result_query; } module.exports = { - test_future_date, - test_valid_years, - test_date_format, - test_date -} + test_future_date, + test_valid_years, + test_date_format, + test_date, +}; diff --git a/dataform/examples/dataform_assertion_unit_test/includes/personal_info_assertions.js b/dataform/examples/dataform_assertion_unit_test/includes/personal_info_assertions.js index 71d063623..76ca5b895 100644 --- a/dataform/examples/dataform_assertion_unit_test/includes/personal_info_assertions.js +++ b/dataform/examples/dataform_assertion_unit_test/includes/personal_info_assertions.js @@ -15,93 +15,97 @@ /* This assertion checks whether the input email format is valid */ -function test_email_validity(colName){ - var result_query = `REGEXP_CONTAINS(${colName}, r'^[\\w.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$')` - return result_query +function test_email_validity(colName) { + var result_query = `REGEXP_CONTAINS(${colName}, r'^[\\w.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$')`; + return result_query; } /* This assertion checks whether the input marital status is within an acceptable list */ -function test_marital_status(colName){ - var marital_list = "'Married', 'Single', 'Divorced', 'Widowed'" - var result_query = `${colName} IN(${marital_list})` - return result_query +function test_marital_status(colName) { + var marital_list = "'Married', 'Single', 'Divorced', 'Widowed'"; + var result_query = `${colName} IN(${marital_list})`; + return result_query; } /* This assertion checks whether the input gender status is within an acceptable list */ -function test_gender_status(colName){ - var gender_list = "'Female','Male','Transgender Female','Transgender Male','Gender Variant','Prefer Not to Say'" - var result_query = `${colName} IN (${gender_list})` - return result_query +function test_gender_status(colName) { + var gender_list = + "'Female','Male','Transgender Female','Transgender Male','Gender Variant','Prefer Not to Say'"; + var result_query = `${colName} IN (${gender_list})`; + return result_query; } /* This assertion checks whether the name is valid and only contain characters and numbers */ -function test_name_validity(colName){ - var result_query = `REGEXP_CONTAINS(${colName}, r'^[a-zA-Z]+$')` - return result_query +function test_name_validity(colName) { + var result_query = `REGEXP_CONTAINS(${colName}, r'^[a-zA-Z]+$')`; + return result_query; } /* This assertions compares with other input column to check whether the last name is unique */ -function test_last_name_unique(colName1, colName2){ - var result_query = `${colName1} != ${colName2}` - return result_query +function test_last_name_unique(colName1, colName2) { + var result_query = `${colName1} != ${colName2}`; + return result_query; } /* The assertion checks that no name contain more than n repeated characters */ -function test_same_character_not_more_than_n_times(colName, n_times){ - var regex = `(A{${n_times + 1},})+|` + - `(B{${n_times + 1},})+|` + - `(C{${n_times + 1},})+|` + - `(D{${n_times + 1},})+|` + - `(E{${n_times + 1},})+|` + - `(F{${n_times + 1},})+|` + - `(G{${n_times + 1},})+|` + - `(H{${n_times + 1},})+|` + - `(I{${n_times + 1},})+|` + - `(J{${n_times + 1},})+|` + - `(K{${n_times + 1},})+|` + - `(L{${n_times + 1},})+|` + - `(M{${n_times + 1},})+|` + - `(N{${n_times + 1},})+|` + - `(O{${n_times + 1},})+|` + - `(P{${n_times + 1},})+|` + - `(Q{${n_times + 1},})+|` + - `(R{${n_times + 1},})+|` + - `(S{${n_times + 1},})+|` + - `(T{${n_times + 1},})+|` + - `(U{${n_times + 1},})+|` + - `(V{${n_times + 1},})+|` + - `(W{${n_times + 1},})+|` + - `(X{${n_times + 1},})+|` + - `(Y{${n_times + 1},})+|` + - `(Z{${n_times + 1},})+` - var query_result = `NOT REGEXP_CONTAINS(UPPER(${colName}), r'${regex}')` - return query_result +function test_same_character_not_more_than_n_times(colName, n_times) { + var regex = + `(A{${n_times + 1},})+|` + + `(B{${n_times + 1},})+|` + + `(C{${n_times + 1},})+|` + + `(D{${n_times + 1},})+|` + + `(E{${n_times + 1},})+|` + + `(F{${n_times + 1},})+|` + + `(G{${n_times + 1},})+|` + + `(H{${n_times + 1},})+|` + + `(I{${n_times + 1},})+|` + + `(J{${n_times + 1},})+|` + + `(K{${n_times + 1},})+|` + + `(L{${n_times + 1},})+|` + + `(M{${n_times + 1},})+|` + + `(N{${n_times + 1},})+|` + + `(O{${n_times + 1},})+|` + + `(P{${n_times + 1},})+|` + + `(Q{${n_times + 1},})+|` + + `(R{${n_times + 1},})+|` + + `(S{${n_times + 1},})+|` + + `(T{${n_times + 1},})+|` + + `(U{${n_times + 1},})+|` + + `(V{${n_times + 1},})+|` + + `(W{${n_times + 1},})+|` + + `(X{${n_times + 1},})+|` + + `(Y{${n_times + 1},})+|` + + `(Z{${n_times + 1},})+`; + var query_result = `NOT REGEXP_CONTAINS(UPPER(${colName}), r'${regex}')`; + return query_result; } /* This assertions combines custom assertions for names */ -function test_name(colName, n_times){ - var result_query = `${test_name_validity(colName)} AND ${test_same_character_not_more_than_n_times(colName, 3)}` - return result_query +function test_name(colName) { + var result_query = `${test_name_validity( + colName + )} AND ${test_same_character_not_more_than_n_times(colName, 3)}`; + return result_query; } module.exports = { - test_email_validity, - test_marital_status, - test_gender_status, - test_name_validity, - test_last_name_unique, - test_same_character_not_more_than_n_times, - test_name -} + test_email_validity, + test_marital_status, + test_gender_status, + test_name_validity, + test_last_name_unique, + test_same_character_not_more_than_n_times, + test_name, +}; diff --git a/dataform/examples/dataform_assertion_unit_test/includes/phone_number_assertions.js b/dataform/examples/dataform_assertion_unit_test/includes/phone_number_assertions.js new file mode 100644 index 000000000..9b6e457c0 --- /dev/null +++ b/dataform/examples/dataform_assertion_unit_test/includes/phone_number_assertions.js @@ -0,0 +1,114 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + This test will return FALSE if telephone number meets any of the following conditions: + - Contains different digits from 0 to 9. + - Not 10 length positions + - Start with a zero. + - If the phone number starts with a ""55"", exclude the ""55"" at the beginning and validate there is one of the following series: ""000000"", ""111111"", ""222222"", ""333333"", ""444444"", ""555555"", ""666666"", ""777777"", ""888888"", ""999999"",""123456"" + - Is assigned to more than 4 policies from diferent customer. + -Match with any number in the ""Frequent case"" list. + - In a 10 digit phone number, validate from left to right the long distance code, series and type: + + LADA / Long distance code + If it starts with 55, 52, 33 or 81, valdiate the LADA with 2 positions, otherwise, validate it with 3 positions. + + SERIE / SERIES + If the LADA had 2 positions, the series must have 4 digits; if the lada had 3 positions, the Lada must have the next 3 positions. + + TIPO / TYPE + The value ""TIPO DE RED"" (network type) in the catalog indicates whether the number is mobile or land line. + + (URL for National dialing plan : https://sns.ift.org.mx:8081/sns-frontend/planes-numeracion/descarga-publica.xhtm) +*/ +function test_phone_number_validity(colName) { + var remSplCharsLeadingZeros = `${test_remove_leading_zeros( + test_remove_special_chars(colName) + )}`; + + return `${test_phone_number_contain_digit(remSplCharsLeadingZeros)} + AND ${test_repeated_phone_number(remSplCharsLeadingZeros)}`; +} + +/* + Remove special characters + . : , ; ! " # $ % & / ( ) = ' + +*/ +function test_remove_special_chars(colName) { + var no_whitespace = `REPLACE(${colName}, ' ', '')`; + var no_dot = `REPLACE(${no_whitespace}, '.', '')`; + var no_colon = `REPLACE(${no_dot}, ':', '')`; + var no_comma = `REPLACE(${no_colon}, ',', '')`; + var no_semicolon = `REPLACE(${no_comma}, ';', '')`; + var no_exclamation = `REPLACE(${no_semicolon}, '!', '')`; + var no_double_quote = `REPLACE(${no_exclamation}, '"', '')`; + var no_hash = `REPLACE(${no_double_quote}, '#', '')`; + var no_dollar = `REPLACE(${no_hash}, '$', '')`; + var no_percentage = `REPLACE(${no_dollar}, '%', '')`; + var no_ampersand = `REPLACE(${no_percentage}, '&', '')`; + var no_forward_slash = `REPLACE(${no_ampersand}, '/', '')`; + var no_left_parenthesis = `REPLACE(${no_forward_slash}, '(', '')`; + var no_right_parenthesis = `REPLACE(${no_left_parenthesis}, ')', '')`; + var no_equal = `REPLACE(${no_right_parenthesis}, '=', '')`; + var no_single_quote = `REPLACE(${no_equal}, '\\'', '')`; + var no_plus = `REPLACE(${no_single_quote}, '+', '')`; + return no_plus; +} + +/* + Precondition leading zero removal + If starts with "00055", delete the "000" +*/ +function test_remove_leading_zeros(colName) { + return `LTRIM(${colName}, '0')`; +} + +/* + This assertion checks that the input telephone number does not contain more than 3 repeated digits +*/ +function test_repeated_phone_number(colName) { + return `TRIM( ${colName}, "0") != "" AND + TRIM( ${colName}, "1") != "" AND + TRIM( ${colName}, "2") != "" AND + TRIM( ${colName}, "3") != "" AND + TRIM( ${colName}, "4") != "" AND + TRIM( ${colName}, "5") != "" AND + TRIM( ${colName}, "6") != "" AND + TRIM( ${colName}, "7") != "" AND + TRIM( ${colName}, "8") != "" AND + TRIM( ${colName}, "9") != ""`; +} + +/* + This assertion checks that the telephone number only contains digits & length not greater than 10 +*/ +function test_phone_number_contain_digit(colName) { + return `REGEXP_CONTAINS(${colName}, r'^[1-9]{1}\\d{9}$')`; +} + +function test_phone_number(colName) { + return ( + `${test_phone_number_validity(colName)}` + + `AND ${test_phone_number_contain_digit(colName)}` + + // `AND ${test_remove_special_chars(colName)}` + + // `AND ${test_remove_leading_zeros(colName)}` + + `AND ${test_repeated_phone_number(colName)}` + + `AND ${test_phone_number_contain_digit(colName)}` + ); +} + +module.exports = { + test_phone_number, +}; diff --git a/dataform/examples/dataform_assertion_unit_test/includes/unit_test_utils.js b/dataform/examples/dataform_assertion_unit_test/includes/unit_test_utils.js index 686b5b725..c95e818cc 100644 --- a/dataform/examples/dataform_assertion_unit_test/includes/unit_test_utils.js +++ b/dataform/examples/dataform_assertion_unit_test/includes/unit_test_utils.js @@ -12,28 +12,36 @@ // See the License for the specific language governing permissions and // limitations under the License. -function generate_test(test_name, test_cases, data_quality_function){ - publish(test_name) - .type("view") - .query(ctx => ` +function generate_test(test_name, test_cases, data_quality_function) { + publish(`${test_name}_dummy_view`).type("view").query("SELECT 1 as col1"); + publish(test_name) + .type("view") + .query( + (ctx) => ` SELECT ${data_quality_function("test_input")} AS is_valid - FROM ${ctx.resolve("test_inputs")} - `); + FROM ${ctx.resolve(`${test_name}_dummy_view`)} + ` + ); - let expected_output_select_statements = []; - let test_input_select_statements = []; - for(var test_case in test_cases) { - test_input_select_statements.push(`SELECT "${test_case}" AS test_input`); - expected_output_select_statements.push(`SELECT ${test_cases[test_case]} AS is_valid`); - }; + let expected_output_select_statements = []; + let test_input_select_statements = []; + for (var test_case in test_cases) { + test_input_select_statements.push(`SELECT "${test_case}" AS test_input`); + expected_output_select_statements.push( + `SELECT ${test_cases[test_case]} AS is_valid` + ); + } - test(test_name) - .dataset(test_name) - .input("test_inputs", `${test_input_select_statements.join(' UNION ALL\n')}`) - .expect(`${expected_output_select_statements.join(' UNION ALL\n')}`); + test(test_name) + .dataset(test_name) + .input( + `${test_name}_dummy_view`, + `${test_input_select_statements.join(" UNION ALL\n")}` + ) + .expect(`${expected_output_select_statements.join(" UNION ALL\n")}`); } module.exports = { - generate_test, -} + generate_test, +}; diff --git a/dataform/examples/dataform_assertion_unit_test/package.json b/dataform/examples/dataform_assertion_unit_test/package.json index df648e973..97f2f0b95 100644 --- a/dataform/examples/dataform_assertion_unit_test/package.json +++ b/dataform/examples/dataform_assertion_unit_test/package.json @@ -1,5 +1,5 @@ { "dependencies": { - "@dataform/core": "1.18.0" + "@dataform/core": "3.0.2" } }