Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"defaultSchema": "dataform",
"assertionSchema": "dataform_assertions",
"defaultDatabase": "YOUR_PROJECT_ID",
"useRunCache": false
"defaultLocation": "us"
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const test_cases = {
Otherwise it will show that the custom data quality rules failed.
*/

"ruinanliu@google.com" : "TRUE",
"ruinanliu@domain.com" : "TRUE",
"[email protected]" : "TRUE",
"1736#$%.com" : "FALSE"
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

const {generate_test} = unit_test_utils;
const {test_phone_number} = phone_assertions;
const {test_phone_number} = phone_number_assertions;
const test_name = "test_telephone_number_assertions";
const test_cases = {
/*
Expand All @@ -32,7 +32,7 @@ const test_cases = {
"7928374657" : "TRUE",
"7847563738" : "TRUE",
"6768907654" : "TRUE",
"1234567" : "FALSE",
"1234567" : "FALSE",
"0123456789" : "FALSE",
"1111111111" : "FALSE",
"374657389a" : "FALSE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,53 +15,53 @@
/*
This assertion checks whether input date is future
*/
function test_future_date(colName){
var result_query = `PARSE_DATE('%Y/%m/%d', ${colName}) < CURRENT_DATE()`
return result_query
function test_future_date(colName) {
var result_query = `PARSE_DATE('%Y/%m/%d', ${colName}) < CURRENT_DATE()`;
return result_query;
}

/*
This assertion checks whether the input birthdate is less than 100 yrs old
*/
function test_valid_years(colName){
var result_query = `DATE_DIFF(CURRENT_DATE(), PARSE_DATE('%Y/%m/%d', ${colName}), YEAR) < 100`
return result_query
function test_valid_years(colName) {
var result_query = `DATE_DIFF(CURRENT_DATE(), PARSE_DATE('%Y/%m/%d', ${colName}), YEAR) < 100`;
return result_query;
}

/*
This function checks whether the format of the date is correct
*/
function test_date_format(colName, date_format){
if(date_format == "yyyy/mm/dd"){
var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[/][0-9]{2}[/][0-9]{2}$')`
return result_query
} else if (date_format == "yyyymmdd"){
var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[0-9]{2}[0-9]{2}$')`
return result_query
}else{
return `FALSE`
}
function test_date_format(colName, date_format) {
if (date_format == "yyyy/mm/dd") {
var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[/][0-9]{2}[/][0-9]{2}$')`;
return result_query;
} else if (date_format == "yyyymmdd") {
var result_query = `REGEXP_CONTAINS(${colName}, r'^[0-9]{4}[0-9]{2}[0-9]{2}$')`;
return result_query;
} else {
return `FALSE`;
}
}

/*
This assertions combines custom assertions for testing future date and valid years
*/

function test_date(colName){
var result_query =
function test_date(colName) {
var result_query =
`IF(${colName} IS NOT NULL AND ${colName} <> "",` +
`IF(${test_date_format(colName, "yyyy/mm/dd")}, ` +
`IF(${test_future_date(colName)}, ` +
`${test_valid_years(colName, 100)}` +
`, FALSE),` +
`IF(${test_date_format(colName, "yyyymmdd")}, ` +
`TRUE, FALSE)), FALSE)`
return result_query
`IF(${test_date_format(colName, "yyyy/mm/dd")}, ` +
`IF(${test_future_date(colName)}, ` +
`${test_valid_years(colName, 100)}` +
`, FALSE),` +
`IF(${test_date_format(colName, "yyyymmdd")}, ` +
`TRUE, FALSE)), FALSE)`;
return result_query;
}

module.exports = {
test_future_date,
test_valid_years,
test_date_format,
test_date
}
test_future_date,
test_valid_years,
test_date_format,
test_date,
};
Original file line number Diff line number Diff line change
Expand Up @@ -15,93 +15,97 @@
/*
This assertion checks whether the input email format is valid
*/
function test_email_validity(colName){
var result_query = `REGEXP_CONTAINS(${colName}, r'^[\\w.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$')`
return result_query
function test_email_validity(colName) {
var result_query = `REGEXP_CONTAINS(${colName}, r'^[\\w.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+$')`;
return result_query;
}

/*
This assertion checks whether the input marital status is within an acceptable list
*/
function test_marital_status(colName){
var marital_list = "'Married', 'Single', 'Divorced', 'Widowed'"
var result_query = `${colName} IN(${marital_list})`
return result_query
function test_marital_status(colName) {
var marital_list = "'Married', 'Single', 'Divorced', 'Widowed'";
var result_query = `${colName} IN(${marital_list})`;
return result_query;
}

/*
This assertion checks whether the input gender status is within an acceptable list
*/
function test_gender_status(colName){
var gender_list = "'Female','Male','Transgender Female','Transgender Male','Gender Variant','Prefer Not to Say'"
var result_query = `${colName} IN (${gender_list})`
return result_query
function test_gender_status(colName) {
var gender_list =
"'Female','Male','Transgender Female','Transgender Male','Gender Variant','Prefer Not to Say'";
var result_query = `${colName} IN (${gender_list})`;
return result_query;
}

/*
This assertion checks whether the name is valid and only contain characters and numbers
*/
function test_name_validity(colName){
var result_query = `REGEXP_CONTAINS(${colName}, r'^[a-zA-Z]+$')`
return result_query
function test_name_validity(colName) {
var result_query = `REGEXP_CONTAINS(${colName}, r'^[a-zA-Z]+$')`;
return result_query;
}

/*
This assertions compares with other input column to check whether the last name is unique
*/
function test_last_name_unique(colName1, colName2){
var result_query = `${colName1} != ${colName2}`
return result_query
function test_last_name_unique(colName1, colName2) {
var result_query = `${colName1} != ${colName2}`;
return result_query;
}

/*
The assertion checks that no name contain more than n repeated characters
*/
function test_same_character_not_more_than_n_times(colName, n_times){
var regex = `(A{${n_times + 1},})+|` +
`(B{${n_times + 1},})+|` +
`(C{${n_times + 1},})+|` +
`(D{${n_times + 1},})+|` +
`(E{${n_times + 1},})+|` +
`(F{${n_times + 1},})+|` +
`(G{${n_times + 1},})+|` +
`(H{${n_times + 1},})+|` +
`(I{${n_times + 1},})+|` +
`(J{${n_times + 1},})+|` +
`(K{${n_times + 1},})+|` +
`(L{${n_times + 1},})+|` +
`(M{${n_times + 1},})+|` +
`(N{${n_times + 1},})+|` +
`(O{${n_times + 1},})+|` +
`(P{${n_times + 1},})+|` +
`(Q{${n_times + 1},})+|` +
`(R{${n_times + 1},})+|` +
`(S{${n_times + 1},})+|` +
`(T{${n_times + 1},})+|` +
`(U{${n_times + 1},})+|` +
`(V{${n_times + 1},})+|` +
`(W{${n_times + 1},})+|` +
`(X{${n_times + 1},})+|` +
`(Y{${n_times + 1},})+|` +
`(Z{${n_times + 1},})+`
var query_result = `NOT REGEXP_CONTAINS(UPPER(${colName}), r'${regex}')`
return query_result
function test_same_character_not_more_than_n_times(colName, n_times) {
var regex =
`(A{${n_times + 1},})+|` +
`(B{${n_times + 1},})+|` +
`(C{${n_times + 1},})+|` +
`(D{${n_times + 1},})+|` +
`(E{${n_times + 1},})+|` +
`(F{${n_times + 1},})+|` +
`(G{${n_times + 1},})+|` +
`(H{${n_times + 1},})+|` +
`(I{${n_times + 1},})+|` +
`(J{${n_times + 1},})+|` +
`(K{${n_times + 1},})+|` +
`(L{${n_times + 1},})+|` +
`(M{${n_times + 1},})+|` +
`(N{${n_times + 1},})+|` +
`(O{${n_times + 1},})+|` +
`(P{${n_times + 1},})+|` +
`(Q{${n_times + 1},})+|` +
`(R{${n_times + 1},})+|` +
`(S{${n_times + 1},})+|` +
`(T{${n_times + 1},})+|` +
`(U{${n_times + 1},})+|` +
`(V{${n_times + 1},})+|` +
`(W{${n_times + 1},})+|` +
`(X{${n_times + 1},})+|` +
`(Y{${n_times + 1},})+|` +
`(Z{${n_times + 1},})+`;
var query_result = `NOT REGEXP_CONTAINS(UPPER(${colName}), r'${regex}')`;
return query_result;
}

/*
This assertions combines custom assertions for names
*/
function test_name(colName, n_times){
var result_query = `${test_name_validity(colName)} AND ${test_same_character_not_more_than_n_times(colName, 3)}`
return result_query
function test_name(colName) {
var result_query = `${test_name_validity(
colName
)} AND ${test_same_character_not_more_than_n_times(colName, 3)}`;
return result_query;
}

module.exports = {
test_email_validity,
test_marital_status,
test_gender_status,
test_name_validity,
test_last_name_unique,
test_same_character_not_more_than_n_times,
test_name
}
test_email_validity,
test_marital_status,
test_gender_status,
test_name_validity,
test_last_name_unique,
test_same_character_not_more_than_n_times,
test_name,
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/*
This test will return FALSE if telephone number meets any of the following conditions:
- Contains different digits from 0 to 9.
- Not 10 length positions
- Start with a zero.
- If the phone number starts with a ""55"", exclude the ""55"" at the beginning and validate there is one of the following series: ""000000"", ""111111"", ""222222"", ""333333"", ""444444"", ""555555"", ""666666"", ""777777"", ""888888"", ""999999"",""123456""
- Is assigned to more than 4 policies from diferent customer.
-Match with any number in the ""Frequent case"" list.
- In a 10 digit phone number, validate from left to right the long distance code, series and type:

LADA / Long distance code
If it starts with 55, 52, 33 or 81, valdiate the LADA with 2 positions, otherwise, validate it with 3 positions.

SERIE / SERIES
If the LADA had 2 positions, the series must have 4 digits; if the lada had 3 positions, the Lada must have the next 3 positions.

TIPO / TYPE
The value ""TIPO DE RED"" (network type) in the catalog indicates whether the number is mobile or land line.

(URL for National dialing plan : https://sns.ift.org.mx:8081/sns-frontend/planes-numeracion/descarga-publica.xhtm)
*/
function test_phone_number_validity(colName) {
var remSplCharsLeadingZeros = `${test_remove_leading_zeros(
test_remove_special_chars(colName)
)}`;

return `${test_phone_number_contain_digit(remSplCharsLeadingZeros)}
AND ${test_repeated_phone_number(remSplCharsLeadingZeros)}`;
}

/*
Remove special characters
. : , ; ! " # $ % & / ( ) = ' +
*/
function test_remove_special_chars(colName) {
var no_whitespace = `REPLACE(${colName}, ' ', '')`;
var no_dot = `REPLACE(${no_whitespace}, '.', '')`;
var no_colon = `REPLACE(${no_dot}, ':', '')`;
var no_comma = `REPLACE(${no_colon}, ',', '')`;
var no_semicolon = `REPLACE(${no_comma}, ';', '')`;
var no_exclamation = `REPLACE(${no_semicolon}, '!', '')`;
var no_double_quote = `REPLACE(${no_exclamation}, '"', '')`;
var no_hash = `REPLACE(${no_double_quote}, '#', '')`;
var no_dollar = `REPLACE(${no_hash}, '$', '')`;
var no_percentage = `REPLACE(${no_dollar}, '%', '')`;
var no_ampersand = `REPLACE(${no_percentage}, '&', '')`;
var no_forward_slash = `REPLACE(${no_ampersand}, '/', '')`;
var no_left_parenthesis = `REPLACE(${no_forward_slash}, '(', '')`;
var no_right_parenthesis = `REPLACE(${no_left_parenthesis}, ')', '')`;
var no_equal = `REPLACE(${no_right_parenthesis}, '=', '')`;
var no_single_quote = `REPLACE(${no_equal}, '\\'', '')`;
var no_plus = `REPLACE(${no_single_quote}, '+', '')`;
return no_plus;
}

/*
Precondition leading zero removal
If starts with "00055", delete the "000"
*/
function test_remove_leading_zeros(colName) {
return `LTRIM(${colName}, '0')`;
}

/*
This assertion checks that the input telephone number does not contain more than 3 repeated digits
*/
function test_repeated_phone_number(colName) {
return `TRIM( ${colName}, "0") != "" AND
TRIM( ${colName}, "1") != "" AND
TRIM( ${colName}, "2") != "" AND
TRIM( ${colName}, "3") != "" AND
TRIM( ${colName}, "4") != "" AND
TRIM( ${colName}, "5") != "" AND
TRIM( ${colName}, "6") != "" AND
TRIM( ${colName}, "7") != "" AND
TRIM( ${colName}, "8") != "" AND
TRIM( ${colName}, "9") != ""`;
}

/*
This assertion checks that the telephone number only contains digits & length not greater than 10
*/
function test_phone_number_contain_digit(colName) {
return `REGEXP_CONTAINS(${colName}, r'^[1-9]{1}\\d{9}$')`;
}

function test_phone_number(colName) {
return (
`${test_phone_number_validity(colName)}` +
`AND ${test_phone_number_contain_digit(colName)}` +
// `AND ${test_remove_special_chars(colName)}` +
// `AND ${test_remove_leading_zeros(colName)}` +
`AND ${test_repeated_phone_number(colName)}` +
`AND ${test_phone_number_contain_digit(colName)}`
);
}

module.exports = {
test_phone_number,
};
Loading