Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 148 additions & 12 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,7 @@
abc,
defaultdict,
)
import copy
from typing import (
TYPE_CHECKING,
Any,
DefaultDict,
overload,
)
from typing import TYPE_CHECKING, Any, DefaultDict

import numpy as np

Expand Down Expand Up @@ -46,24 +40,166 @@ def convert_to_line_delimits(s: str) -> str:
return convert_json_to_lines(s)


@overload
def nested_to_record(
ds: dict,
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> dict[str, Any]: ...
) -> dict[str, Any]:
"""
A simplified json_normalize

Converts a nested dict into a flat dict ("record"), unlike json_normalize,
it does not attempt to extract a subset of the data.

Parameters
----------
ds : dict or list of dicts
prefix: the prefix, optional, default: ""
sep : str, default '.'
Nested records will generate names separated by sep,
e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
level: int, optional, default: 0
The number of levels in the json string.

max_level: int, optional, default: None
The max depth to normalize.

Returns
-------
d - dict or list of dicts, matching `ds`

Examples
--------
>>> nested_to_record(
... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}
"""
singleton = False
if isinstance(ds, dict):
ds = [ds]
singleton = True
new_ds = []
for d in ds:
new_d = dict(d)
for k, v in d.items():
# each key gets renamed with prefix
if not isinstance(k, str):
k = str(k)
if level == 0:
newkey = k
else:
newkey = prefix + sep + k

# flatten if type is dict and
# current dict level < maximum level provided and
# only dicts gets recurse-flattened
# only at level>1 do we rename the rest of the keys
if not isinstance(v, dict) or (
max_level is not None and level >= max_level
):
if level != 0: # so we skip copying for top level, common case
v = new_d.pop(k)
new_d[newkey] = v
continue

v = new_d.pop(k)
new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level))
new_ds.append(new_d)

if singleton:
return new_ds[0]
return new_ds


@overload
def nested_to_record(
ds: list[dict],
prefix: str = ...,
sep: str = ...,
level: int = ...,
max_level: int | None = ...,
) -> list[dict[str, Any]]: ...
) -> list[dict[str, Any]]:
"""
A simplified json_normalize

Converts a nested dict into a flat dict ("record"), unlike json_normalize,
it does not attempt to extract a subset of the data.

Parameters
----------
ds : dict or list of dicts
prefix: the prefix, optional, default: ""
sep : str, default '.'
Nested records will generate names separated by sep,
e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar
level: int, optional, default: 0
The number of levels in the json string.

max_level: int, optional, default: None
The max depth to normalize.

Returns
-------
d - dict or list of dicts, matching `ds`

Examples
--------
>>> nested_to_record(
... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}
"""
singleton = False
if isinstance(ds, dict):
ds = [ds]
singleton = True
new_ds = []
for d in ds:
new_d = dict(d)
for k, v in d.items():
# each key gets renamed with prefix
if not isinstance(k, str):
k = str(k)
if level == 0:
newkey = k
else:
newkey = prefix + sep + k

# flatten if type is dict and
# current dict level < maximum level provided and
# only dicts gets recurse-flattened
# only at level>1 do we rename the rest of the keys
if not isinstance(v, dict) or (
max_level is not None and level >= max_level
):
if level != 0: # so we skip copying for top level, common case
v = new_d.pop(k)
new_d[newkey] = v
continue

v = new_d.pop(k)
new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level))
new_ds.append(new_d)

if singleton:
return new_ds[0]
return new_ds


def nested_to_record(
Expand Down Expand Up @@ -116,7 +252,7 @@ def nested_to_record(
singleton = True
new_ds = []
for d in ds:
new_d = copy.deepcopy(d)
new_d = dict(d)
for k, v in d.items():
# each key gets renamed with prefix
if not isinstance(k, str):
Expand Down