import dask.dataframe as dd
from config import dbs
from sqlalchemy import text, select
con = "mysql+pymysql://{user}:{password}@{host}/{db}?charset=utf8mb4".format(
user="user", password="password", host="host", db="db")
sql = text(" id, count(*) AS total FROM `test`")
select_stmt = select(sql)
print(con)
print(select_stmt)
df1 = dd.read_sql_query(sql=select_stmt, con=con, index_col="id").compute()
print(df1.shape)
注意:
dask不兼容sqlalchemy 2.0版本以上。
python -m pip install --upgrade 'sqlalchemy<2.0'
另外的方案:dask-sql
https://dask-sql.readthedocs.io/en/0.3.1/index.html
https://www.dask.org/get-started
https://www.appsloveworld.com/pandas/100/180/reading-an-sql-query-into-a-dask-dataframe
https://docs.dask.org/en/stable/dataframe-api.html
https://cloud.tencent.com/developer/ask/sof/107228030