import pandas as pd
df = pd.read_csv("data.csv", header=1, names=["index", "id", "url"])
df.dropna(how="any", inplace=True)
# DataFrame方式
newdf1 = df[(df.id == 4099963) | (df.id == 5181745)]
print("newdf1:", newdf1)
# 查询函数
newdf2 = df.query("id == 4099963 | id == 5181745")
print("newdf2:", newdf2)
# loc函数
newdf3 = df.loc[(df.id == 4099963) | (df.id == 5181745)]
print("newdf3:", newdf3)
# 行列筛选
newdf4 = df.iloc[1:5, :4]
print("newdf4:", newdf4)
# 非空
newdf5 = df[df.url.notnull()]
# str
newdf6 = df[df.url.str.contains("https")]
print(newdf6)
# apply
newdf7 = df[df.apply(lambda x: len(x["url"]) > 10, axis=1)]
print(newdf7)
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html
http://www.360doc.com/document/23/0624/20/82716111_1086091874.shtml