diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 45c8876dbe3e5..6cfc7cdb5ed5e 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -6,13 +6,7 @@ abc, defaultdict, ) -import copy -from typing import ( - TYPE_CHECKING, - Any, - DefaultDict, - overload, -) +from typing import TYPE_CHECKING, Any, DefaultDict import numpy as np @@ -46,24 +40,166 @@ def convert_to_line_delimits(s: str) -> str: return convert_json_to_lines(s) -@overload def nested_to_record( ds: dict, prefix: str = ..., sep: str = ..., level: int = ..., max_level: int | None = ..., -) -> dict[str, Any]: ... +) -> dict[str, Any]: + """ + A simplified json_normalize + + Converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + level: int, optional, default: 0 + The number of levels in the json string. + + max_level: int, optional, default: None + The max depth to normalize. + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + >>> nested_to_record( + ... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2)) + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + new_ds = [] + for d in ds: + new_d = dict(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, str): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + sep + k + + # flatten if type is dict and + # current dict level < maximum level provided and + # only dicts gets recurse-flattened + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict) or ( + max_level is not None and level >= max_level + ): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds -@overload def nested_to_record( ds: list[dict], prefix: str = ..., sep: str = ..., level: int = ..., max_level: int | None = ..., -) -> list[dict[str, Any]]: ... +) -> list[dict[str, Any]]: + """ + A simplified json_normalize + + Converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + level: int, optional, default: 0 + The number of levels in the json string. + + max_level: int, optional, default: None + The max depth to normalize. + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + >>> nested_to_record( + ... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2)) + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + new_ds = [] + for d in ds: + new_d = dict(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, str): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + sep + k + + # flatten if type is dict and + # current dict level < maximum level provided and + # only dicts gets recurse-flattened + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict) or ( + max_level is not None and level >= max_level + ): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds def nested_to_record( @@ -116,7 +252,7 @@ def nested_to_record( singleton = True new_ds = [] for d in ds: - new_d = copy.deepcopy(d) + new_d = dict(d) for k, v in d.items(): # each key gets renamed with prefix if not isinstance(k, str):