Skip to content

Problem with get_dataset when dataset does not have qualities #1290

@LizzAlice

Description

@LizzAlice

Description

I tried using openml.datasets.get_dataset(202, download_data=False), but got the error

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1241, in _get_dataset_qualities_file(did_cache_dir, dataset_id)
   1240 try:
-> 1241     with io.open(qualities_file, encoding="utf8") as fh:
   1242         qualities_xml = fh.read()

FileNotFoundError: [Errno 2] No such file or directory: '/home/.cache/openml/org/openml/www/datasets/202/qualities.xml'

During handling of the above exception, another exception occurred:

OpenMLServerException                     Traceback (most recent call last)
Cell In[12], line 2
      1 for did in did_list:
----> 2             ds = openml.datasets.get_dataset(int(did), download_data=False)

File ~/lib/python3.9/site-packages/openml/datasets/functions.py:514, in get_dataset(dataset_id, download_data, version, error_if_multiple, cache_format, download_qualities, download_features_meta_data, download_all_files, force_refresh_cache)
    512         raise OpenMLPrivateDatasetError(e.message) from None
    513     else:
--> 514         raise e
    515 finally:
    516     if remove_dataset_cache:

File ~/lib/python3.9/site-packages/openml/datasets/functions.py:493, in get_dataset(dataset_id, download_data, version, error_if_multiple, cache_format, download_qualities, download_features_meta_data, download_all_files, force_refresh_cache)
    491     features_file = _get_dataset_features_file(did_cache_dir, dataset_id)
    492 if download_qualities:
--> 493     qualities_file = _get_dataset_qualities_file(did_cache_dir, dataset_id)
    495 arff_file = _get_dataset_arff(description) if download_data else None
    496 if "oml:minio_url" in description and download_data:

File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1245, in _get_dataset_qualities_file(did_cache_dir, dataset_id)
   1243 except (OSError, IOError):
   1244     try:
-> 1245         qualities_xml = _get_qualities_xml(dataset_id)
   1246         with io.open(qualities_file, "w", encoding="utf8") as fh:
   1247             fh.write(qualities_xml)

File ~/lib/python3.9/site-packages/openml/datasets/functions.py:1205, in _get_qualities_xml(dataset_id)
   1203 def _get_qualities_xml(dataset_id):
   1204     url_extension = f"data/qualities/{dataset_id}"
-> 1205     return openml._api_calls._perform_api_call(url_extension, "get")

File ~/lib/python3.9/site-packages/openml/_api_calls.py:99, in _perform_api_call(call, request_method, data, file_elements)
     97     response = _read_url_files(url, data=data, file_elements=file_elements)
     98 else:
---> 99     response = __read_url(url, request_method, data)
    101 __check_response(response, url, file_elements)
    103 logging.info(
    104     "%.7fs taken for [%s] request for the URL %s",
    105     time.time() - start,
    106     request_method,
    107     url,
    108 )

File ~/lib/python3.9/site-packages/openml/_api_calls.py:308, in __read_url(url, request_method, data, md5_checksum)
    306 if config.apikey:
    307     data["api_key"] = config.apikey
--> 308 return _send_request(
    309     request_method=request_method, url=url, data=data, md5_checksum=md5_checksum
    310 )

File ~/lib/python3.9/site-packages/openml/_api_calls.py:344, in _send_request(request_method, url, data, files, md5_checksum)
    342 else:
    343     raise NotImplementedError()
--> 344 __check_response(response=response, url=url, file_elements=files)
    345 if request_method == "get" and not __is_checksum_equal(
    346     response.text.encode("utf-8"), md5_checksum
    347 ):
    348     # -- Check if encoding is not UTF-8 perhaps
    349     if __is_checksum_equal(response.content, md5_checksum):

File ~/lib/python3.9/site-packages/openml/_api_calls.py:409, in __check_response(response, url, file_elements)
    405 def __check_response(
    406     response: requests.Response, url: str, file_elements: Optional[FILE_ELEMENTS_TYPE]
    407 ) -> None:
    408     if response.status_code != 200:
--> 409         raise __parse_server_exception(response, url, file_elements=file_elements)
    410     elif (
    411         "Content-Encoding" not in response.headers or response.headers["Content-Encoding"] != "gzip"
    412     ):
    413         logging.warning("Received uncompressed content from OpenML for {}.".format(url))

OpenMLServerException: https://www.openml.org/api/v1/xml/data/qualities/202 returned code 362: No qualities found - None

Versions

Linux-5.10.0-26-amd64-x86_64-with-glibc2.31
Python 3.9.2 (default, Feb 28 2021, 17:03:44)
[GCC 10.2.1 20210110]
NumPy 1.26.2
SciPy 1.11.4
Scikit-Learn 1.3.2
OpenML 0.14.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementserversideThese issues are present in the rest API and not fixable by the Python package.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions