1111 - HuggingFace
1212 - VisionArena
1313"""
14+ import argparse
1415import ast
1516import base64
1617import io
@@ -1019,6 +1020,25 @@ def sample(
10191020 return samples
10201021
10211022
1023+ class _ValidateDatasetArgs (argparse .Action ):
1024+ """Argparse action to validate dataset name and path compatibility."""
1025+ def __call__ (self , parser , namespace , values , option_string = None ):
1026+ setattr (namespace , self .dest , values )
1027+
1028+ # Get current values of both dataset_name and dataset_path
1029+ dataset_name = getattr (namespace , 'dataset_name' , 'random' )
1030+ dataset_path = getattr (namespace , 'dataset_path' , None )
1031+
1032+ # Validate the combination
1033+ if dataset_name == "random" and dataset_path is not None :
1034+ parser .error (
1035+ "Cannot use 'random' dataset with --dataset-path. "
1036+ "Please specify the appropriate --dataset-name (e.g., "
1037+ "'sharegpt', 'custom', 'sonnet') for your dataset file: "
1038+ f"{ dataset_path } "
1039+ )
1040+
1041+
10221042def add_dataset_parser (parser : FlexibleArgumentParser ):
10231043 parser .add_argument ("--seed" , type = int , default = 0 )
10241044 parser .add_argument (
@@ -1031,6 +1051,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
10311051 "--dataset-name" ,
10321052 type = str ,
10331053 default = "random" ,
1054+ action = _ValidateDatasetArgs ,
10341055 choices = [
10351056 "sharegpt" , "burstgpt" , "sonnet" , "random" , "random-mm" , "hf" ,
10361057 "custom" , "prefix_repetition" , "spec_bench"
@@ -1046,6 +1067,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser):
10461067 "--dataset-path" ,
10471068 type = str ,
10481069 default = None ,
1070+ action = _ValidateDatasetArgs ,
10491071 help = "Path to the sharegpt/sonnet dataset. "
10501072 "Or the huggingface dataset ID if using HF dataset." ,
10511073 )
0 commit comments