read_json([path_or_buf, orient, typ, dtype, ...])
json_normalize(data[, record_path, meta, ...])
DataFrame.to_json([path_or_buf, orient, ...])
build_table_schema(data[, index, ...])
pandas.read_json(path_or_buf=None,
orient=None,
typ='frame',
dtype=None,
convert_axes=None,
convert_dates=True,
keep_default_dates=True,
numpy=False,
precise_float=False,
date_unit=None,
encoding=None,
encoding_errors='strict',
lines=False,
chunksize=None,
compression='infer',
nrows=None,
storage_options=None)
参数:
返回值: Series or DataFrame
示例:
json文件内容:
[{"ttery":"[123]","issue":"20130801-3391"},{"ttery":"[123]","issue":"20130801-3390"},{"ttery":"[123]","issue":"20130801-3389"}]
# -*- coding: utf-8 -*-
import pandas as pd
file = open('ceshi.json', 'r', encoding='utf-8')
df = pd.read_json(file, orient='records')
df.to_excel('pandas处理ceshi-json.xlsx', index=False, columns=["ttery", "issue"])
pandas.json_normalize(data,
record_path=None,
meta=None,
meta_prefix=None,
record_prefix=None,
errors='raise',
sep='.',
max_level=None)
参数:
返回值: frame:DataFrame
示例:
data = [
{"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
{"name": {"given": "Mark", "family": "Regner"}},
{"id": 2, "name": "Faye Raker"},
]
pd.json_normalize(data)
id name.first name.last name.given name.family name
0 1.0 Coleen Volk NaN NaN NaN
1 NaN NaN NaN Mark Regner NaN
2 2.0 NaN NaN NaN NaN Faye Raker
data = [
{
"id": 1,
"name": "Cole Volk",
"fitness": {"height": 130, "weight": 60},
},
{"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
{
"id": 2,
"name": "Faye Raker",
"fitness": {"height": 130, "weight": 60},
},
]
pd.json_normalize(data, max_level=0)
id name fitness
0 1.0 Cole Volk {'height': 130, 'weight': 60}
1 NaN Mark Reg {'height': 130, 'weight': 60}
2 2.0 Faye Raker {'height': 130, 'weight': 60}
data = [
{
"id": 1,
"name": "Cole Volk",
"fitness": {"height": 130, "weight": 60},
},
{"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
{
"id": 2,
"name": "Faye Raker",
"fitness": {"height": 130, "weight": 60},
},
]
pd.json_normalize(data, max_level=1)
id name fitness.height fitness.weight
0 1.0 Cole Volk 130 60
1 NaN Mark Reg 130 60
2 2.0 Faye Raker 130 60
data = [
{
"state": "Florida",
"shortname": "FL",
"info": {"governor": "Rick Scott"},
"counties": [
{"name": "Dade", "population": 12345},
{"name": "Broward", "population": 40000},
{"name": "Palm Beach", "population": 60000},
],
},
{
"state": "Ohio",
"shortname": "OH",
"info": {"governor": "John Kasich"},
"counties": [
{"name": "Summit", "population": 1234},
{"name": "Cuyahoga", "population": 1337},
],
},
]
result = pd.json_normalize(
data, "counties", ["state", "shortname", ["info", "governor"]]
)
name population state shortname info.governor
0 Dade 12345 Florida FL Rick Scott
1 Broward 40000 Florida FL Rick Scott
2 Palm Beach 60000 Florida FL Rick Scott
3 Summit 1234 Ohio OH John Kasich
4 Cuyahoga 1337 Ohio OH John Kasich
DataFrame.to_json(path_or_buf=None,
orient=None,
date_format=None,
double_precision=10,
force_ascii=True,
date_unit='ms',
default_handler=None,
lines=False,
compression='infer',
index=True,
indent=None,
storage_options=None)
参数:
返回值: None or str
示例:
import json
df = pd.DataFrame(
[["a", "b"], ["c", "d"]],
index=["row 1", "row 2"],
columns=["col 1", "col 2"],
)
result = df.to_json(orient="split")
parsed = json.loads(result)
json.dumps(parsed, indent=4)
{
"columns": [
"col 1",
"col 2"
],
"index": [
"row 1",
"row 2"
],
"data": [
[
"a",
"b"
],
[
"c",
"d"
]
]
}
pandas.io.json.build_table_schema(data, index=True, primary_key=None, version=True)
参数:
返回值: schema:dict
示例:
df = pd.DataFrame(
{'A': [1, 2, 3],
'B': ['a', 'b', 'c'],
'C': pd.date_range('2016-01-01', freq='d', periods=3),
}, index=pd.Index(range(3), name='idx'))
build_table_schema(df)
{'fields': [{'name': 'idx', 'type': 'integer'}, {'name': 'A', 'type': 'integer'}, {'name': 'B', 'type': 'string'}, {'name': 'C', 'type': 'datetime'}], 'primaryKey': ['idx'], 'pandas_version': '1.4.0'}