|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# |
| 3 | +# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# |
| 6 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +# you may not use this file except in compliance with the License. |
| 8 | +# You may obtain a copy of the License at |
| 9 | +# |
| 10 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +# |
| 12 | +# Unless required by applicable law or agreed to in writing, software |
| 13 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +# See the License for the specific language governing permissions and |
| 16 | +# limitations under the License. |
| 17 | +# |
| 18 | +# ----- |
| 19 | +# |
| 20 | +# Certain portions of the contents of this file are derived from TPC-DS version 3.2.0 |
| 21 | +# (retrieved from www.tpc.org/tpc_documents_current_versions/current_specifications5.asp). |
| 22 | +# Such portions are subject to copyrights held by Transaction Processing Performance Council (“TPC”) |
| 23 | +# and licensed under the TPC EULA (a copy of which accompanies this file as “TPC EULA” and is also |
| 24 | +# available at http://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp) (the “TPC EULA”). |
| 25 | +# |
| 26 | +# You may not use this file except in compliance with the TPC EULA. |
| 27 | +# DISCLAIMER: Portions of this file is derived from the TPC-DS Benchmark and as such any results |
| 28 | +# obtained using this file are not comparable to published TPC-DS Benchmark results, as the results |
| 29 | +# obtained from using this file do not comply with the TPC-DS Benchmark. |
| 30 | +# |
| 31 | + |
| 32 | +import argparse |
| 33 | +import os |
| 34 | +import sys |
| 35 | +from pathlib import Path |
| 36 | + |
| 37 | + |
| 38 | +def check_version(): |
| 39 | + req_ver = (3, 6) |
| 40 | + cur_ver = sys.version_info |
| 41 | + if cur_ver < req_ver: |
| 42 | + raise Exception('Minimum required Python version is 3.6, but current python version is {}.' |
| 43 | + .format(str(cur_ver.major) + '.' + str(cur_ver.minor)) + |
| 44 | + ' Please use proper Python version') |
| 45 | + |
| 46 | + |
| 47 | +def check_build(): |
| 48 | + """check jar and tpcds executable |
| 49 | +
|
| 50 | + Raises: |
| 51 | + Exception: the build is not done or broken |
| 52 | +
|
| 53 | + Returns: |
| 54 | + PosixPath, PosixPath: path of jar and dsdgen executable |
| 55 | + """ |
| 56 | + # Check if necessary executable or jars are built. |
| 57 | + # we assume user won't move this script. |
| 58 | + src_dir = Path(__file__).parent.absolute() |
| 59 | + jar_path = list( |
| 60 | + Path(src_dir / 'tpcds-gen/target').rglob("tpcds-gen-*.jar")) |
| 61 | + tool_path = list(Path(src_dir / 'tpcds-gen/target/tools').rglob("dsdgen")) |
| 62 | + if jar_path == [] or tool_path == []: |
| 63 | + raise Exception('Target jar file is not found in `target` folder or dsdgen executable is ' + |
| 64 | + 'not found in `target/tools` folder.' + |
| 65 | + 'Please refer to README document and build this project first.') |
| 66 | + return jar_path[0], tool_path[0] |
| 67 | + |
| 68 | + |
| 69 | +def get_abs_path(input_path): |
| 70 | + """receive a user input path and return absolute path of it. |
| 71 | +
|
| 72 | + Args: |
| 73 | + input_path (str): user's input path |
| 74 | +
|
| 75 | + Returns: |
| 76 | + str: if the input is absolute, return it; if it's relative path, return the absolute path of |
| 77 | + it. |
| 78 | + """ |
| 79 | + if Path(input_path).is_absolute(): |
| 80 | + # it's absolute path |
| 81 | + output_path = input_path |
| 82 | + else: |
| 83 | + # it's relative path where this script is executed |
| 84 | + output_path = os.getcwd() + '/' + input_path |
| 85 | + return output_path |
| 86 | + |
| 87 | + |
| 88 | +def valid_range(range, parallel): |
| 89 | + """check the range validation |
| 90 | +
|
| 91 | + Args: |
| 92 | + range (str): a range specified for a range data generation, e.g. "1,10" |
| 93 | + parallel (str): string type number for parallelism in TPC-DS data generation, e.g. "20" |
| 94 | +
|
| 95 | + Raises: |
| 96 | + Exception: error message for invalid range input. |
| 97 | + """ |
| 98 | + if len(range.split(',')) != 2: |
| 99 | + msg = 'Invalid range: please specify a range with a comma between start and end. e.g., "1,10".' |
| 100 | + raise Exception(msg) |
| 101 | + range_start = int(range.split(',')[0]) |
| 102 | + range_end = int(range.split(',')[1]) |
| 103 | + if range_start < 1 or range_start > range_end or range_end > int(parallel): |
| 104 | + msg = 'Please provide correct child range: 1 <= range_start <= range_end <= parallel' |
| 105 | + raise Exception(msg) |
| 106 | + return range_start, range_end |
| 107 | + |
| 108 | + |
| 109 | +def parallel_value_type(p): |
| 110 | + """helper function to check parallel valuie |
| 111 | +
|
| 112 | + Args: |
| 113 | + p (str): parallel value |
| 114 | +
|
| 115 | + Raises: |
| 116 | + argparse.ArgumentTypeError: ArgumentTypeError exception |
| 117 | +
|
| 118 | + Returns: |
| 119 | + str: parallel in string |
| 120 | + """ |
| 121 | + if int(p) < 2: |
| 122 | + raise argparse.ArgumentTypeError("PARALLEL must be >= 2") |
| 123 | + return p |
| 124 | + |
| 125 | + |
| 126 | +def get_dir_size(start_path): |
| 127 | + total_size = 0 |
| 128 | + for dirpath, dirnames, filenames in os.walk(start_path): |
| 129 | + for f in filenames: |
| 130 | + fp = os.path.join(dirpath, f) |
| 131 | + # skip if it is symbolic link |
| 132 | + if not os.path.islink(fp): |
| 133 | + total_size += os.path.getsize(fp) |
| 134 | + return total_size |
| 135 | + |
| 136 | +def check_json_summary_folder(json_summary_folder): |
| 137 | + if json_summary_folder: |
| 138 | + # prepare a folder to save json summaries of query results |
| 139 | + if not os.path.exists(json_summary_folder): |
| 140 | + os.makedirs(json_summary_folder) |
| 141 | + else: |
| 142 | + if os.listdir(json_summary_folder): |
| 143 | + raise Exception(f"json_summary_folder {json_summary_folder} is not empty. " + |
| 144 | + "There may be already some json files there. Please clean the folder " + |
| 145 | + "or specify another one.") |
| 146 | + |
| 147 | +def check_query_subset_exists(query_dict, subset_list): |
| 148 | + """check if the query subset exists in the query dictionary""" |
| 149 | + for q in subset_list: |
| 150 | + if q not in query_dict.keys(): |
| 151 | + raise Exception(f"Query {q} is not in the query dictionary. Please check the query subset.") |
| 152 | + return True |
0 commit comments