• R包学习——reshape包中melt、cast、merge函数用法


    本文使用的例子皆为官方example,在Rstudio中使用 ?+函数名 即可查看。

    1. melt:Melt an object into a form suitable for easy casting.

    > head(tips)
      total_bill  tip    sex smoker day   time size
    1      16.99 1.01 Female     No Sun Dinner    2
    2      10.34 1.66   Male     No Sun Dinner    3
    3      21.01 3.50   Male     No Sun Dinner    3
    4      23.68 3.31   Male     No Sun Dinner    2
    5      24.59 3.61 Female     No Sun Dinner    4
    6      25.29 4.71   Male     No Sun Dinner    4
    > head(melt(tips))
    Using sex, smoker, day, time as id variables
         sex smoker day   time   variable value
    1 Female     No Sun Dinner total_bill 16.99
    2   Male     No Sun Dinner total_bill 10.34
    3   Male     No Sun Dinner total_bill 21.01
    4   Male     No Sun Dinner total_bill 23.68
    5 Female     No Sun Dinner total_bill 24.59
    6   Male     No Sun Dinner total_bill 25.29
    > 
    > head(airquality)
      ozone solar.r wind temp month day
    1    41     190  7.4   67     5   1
    2    36     118  8.0   72     5   2
    3    12     149 12.6   74     5   3
    4    18     313 11.5   62     5   4
    5    NA      NA 14.3   56     5   5
    6    28      NA 14.9   66     5   6
    > names(airquality) <- tolower(names(airquality))
    > head(melt(airquality, id=c("month", "day")))
      month day variable value
    1     5   1    ozone    41
    2     5   2    ozone    36
    3     5   3    ozone    12
    4     5   4    ozone    18
    5     5   5    ozone    NA
    6     5   6    ozone    28
    > 
    > head(ChickWeight) 
      weight time chick diet
    1     42    0     1    1
    2     51    2     1    1
    3     59    4     1    1
    4     64    6     1    1
    5     76    8     1    1
    6     93   10     1    1
    > names(ChickWeight) <- tolower(names(ChickWeight))
    > head(melt(ChickWeight, id=2:4))
      time chick diet variable value
    1    0     1    1   weight    42
    2    2     1    1   weight    51
    3    4     1    1   weight    59
    4    6     1    1   weight    64
    5    8     1    1   weight    76
    6   10     1    1   weight    93
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53

    2. cast:Cast a molten data frame into the reshaped or aggregated form you want

    > names(airquality) <- tolower(names(airquality))
    > aqm <- melt(airquality, id=c("month", "day"), na.rm=TRUE)
    > head(aqm)
      month day variable value
    1     5   1    ozone    41
    2     5   2    ozone    36
    3     5   3    ozone    12
    4     5   4    ozone    18
    5     5   6    ozone    28
    6     5   7    ozone    23
    
    
    > str(cast(aqm, day ~ month ~ variable))
     num [1:31, 1:5, 1:4] 41 36 12 18 NA 28 23 19 8 NA ...
     - attr(*, "dimnames")=List of 3
      ..$ day     : Named chr [1:31] "1" "2" "3" "4" ...
      .. ..- attr(*, "names")= chr [1:31] "1" "20" "39" "58" ...
      ..$ month   : Named chr [1:5] "5" "6" "7" "8" ...
      .. ..- attr(*, "names")= chr [1:5] "1" "600" "8" "12" ...
      ..$ variable: Named chr [1:4] "ozone" "solar.r" "wind" "temp"
      .. ..- attr(*, "names")= chr [1:4] "1" "2" "3" "4"
    
    
    > cast(aqm, month ~ variable, mean)
      month    ozone  solar.r      wind     temp
    1     5 23.61538 181.2963 11.622581 65.54839
    2     6 29.44444 190.1667 10.266667 79.10000
    3     7 59.11538 216.4839  8.941935 83.90323
    4     8 59.96154 171.8571  8.793548 83.96774
    5     9 31.44828 167.4333 10.180000 76.90000
    > str(cast(aqm, month ~ variable, mean))
    List of 5
     $ month  : int [1:5] 5 6 7 8 9
     $ ozone  : num [1:5] 23.6 29.4 59.1 60 31.4
     $ solar.r: num [1:5] 181 190 216 172 167
     $ wind   : num [1:5] 11.62 10.27 8.94 8.79 10.18
     $ temp   : num [1:5] 65.5 79.1 83.9 84 76.9
     - attr(*, "row.names")= int [1:5] 1 2 3 4 5
     - attr(*, "idvars")= chr "month"
     - attr(*, "rdimnames")=List of 2
      ..$ :'data.frame':	5 obs. of  1 variable:
      .. ..$ month: int [1:5] 5 6 7 8 9
      ..$ :'data.frame':	4 obs. of  1 variable:
      .. ..$ variable: Factor w/ 4 levels "ozone","solar.r",..: 1 2 3 4
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44

    3. merge:Merge two data frames by common columns or row names, or do other versions of database join operations.

    (1)数据准备

    > authors <- data.frame(
    +   ## I(*) : use character columns of names to get sensible sort order
    +   surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
    +   nationality = c("US", "Australia", "US", "UK", "Australia"),
    +   deceased = c("yes", rep("no", 4)))
    > authors
       surname nationality deceased
    1    Tukey          US      yes
    2 Venables   Australia       no
    3  Tierney          US       no
    4   Ripley          UK       no
    5   McNeil   Australia       no
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    # 建立副本,新增一列surname改成name,删除surname列
    > authorN <- within(authors, { name <- surname; rm(surname) })
    > authorN
      nationality deceased     name
    1          US      yes    Tukey
    2   Australia       no Venables
    3          US       no  Tierney
    4          UK       no   Ripley
    5   Australia       no   McNeil
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    > books <- data.frame(
    +   name = I(c("Tukey", "Venables", "Tierney",
    +              "Ripley", "Ripley", "McNeil", "R Core")),
    +   title = c("Exploratory Data Analysis",
    +             "Modern Applied Statistics ...",
    +             "LISP-STAT",
    +             "Spatial Statistics", "Stochastic Simulation",
    +             "Interactive Data Analysis",
    +             "An Introduction to R"),
    +   other.author = c(NA, "Ripley", NA, NA, NA, NA,
    +                    "Venables & Smith"))
    > books
          name                         title     other.author
    1    Tukey     Exploratory Data Analysis             <NA>
    2 Venables Modern Applied Statistics ...           Ripley
    3  Tierney                     LISP-STAT             <NA>
    4   Ripley            Spatial Statistics             <NA>
    5   Ripley         Stochastic Simulation             <NA>
    6   McNeil     Interactive Data Analysis             <NA>
    7   R Core          An Introduction to R  Venables & Smith
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20

    (2)默认以两个dataframe都拥有的列(列名相同)进行merge

    
    > (m0 <- merge(authorN, books))
          name nationality deceased                         title other.author
    1   McNeil   Australia       no     Interactive Data Analysis         <NA>
    2   Ripley          UK       no            Spatial Statistics         <NA>
    3   Ripley          UK       no         Stochastic Simulation         <NA>
    4  Tierney          US       no                     LISP-STAT         <NA>
    5    Tukey          US      yes     Exploratory Data Analysis         <NA>
    6 Venables   Australia       no Modern Applied Statistics ...       Ripley
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9

    (3)当两个dataframe没有相同列名时,可以指定列进行merge

    注意:merge的几种使用方法等同于dplyr包中的四种函数(inner_join、left_join、right_join、full_join

    i. inner_join

    > library(dplyr)
    
    > (m1 <- merge(authors, books, by.x = "surname", by.y = "name"))
       surname nationality deceased                         title other.author
    1   McNeil   Australia       no     Interactive Data Analysis         <NA>
    2   Ripley          UK       no            Spatial Statistics         <NA>
    3   Ripley          UK       no         Stochastic Simulation         <NA>
    4  Tierney          US       no                     LISP-STAT         <NA>
    5    Tukey          US      yes     Exploratory Data Analysis         <NA>
    6 Venables   Australia       no Modern Applied Statistics ...       Ripley
    
    > (inner_join(authors, books, by = c("surname" = "name")))
       surname nationality deceased                         title other.author
    1    Tukey          US      yes     Exploratory Data Analysis         <NA>
    2 Venables   Australia       no Modern Applied Statistics ...       Ripley
    3  Tierney          US       no                     LISP-STAT         <NA>
    4   Ripley          UK       no            Spatial Statistics         <NA>
    5   Ripley          UK       no         Stochastic Simulation         <NA>
    6   McNeil   Australia       no     Interactive Data Analysis         <NA>
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    > (m2 <- merge(books, authors, by.x = "name", by.y = "surname"))
          name                         title other.author nationality deceased
    1   McNeil     Interactive Data Analysis         <NA>   Australia       no
    2   Ripley            Spatial Statistics         <NA>          UK       no
    3   Ripley         Stochastic Simulation         <NA>          UK       no
    4  Tierney                     LISP-STAT         <NA>          US       no
    5    Tukey     Exploratory Data Analysis         <NA>          US      yes
    6 Venables Modern Applied Statistics ...       Ripley   Australia       no
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8

    ii. left_join

    
    > merge(authors, books, by.x = "surname", by.y = "name", all.x = TRUE)
       surname nationality deceased                         title other.author
    1   McNeil   Australia       no     Interactive Data Analysis         <NA>
    2   Ripley          UK       no            Spatial Statistics         <NA>
    3   Ripley          UK       no         Stochastic Simulation         <NA>
    4  Tierney          US       no                     LISP-STAT         <NA>
    5    Tukey          US      yes     Exploratory Data Analysis         <NA>
    6 Venables   Australia       no Modern Applied Statistics ...       Ripley	
    
    > left_join(authors, books, by = c("surname" = "name"))
       surname nationality deceased                         title other.author
    1    Tukey          US      yes     Exploratory Data Analysis         <NA>
    2 Venables   Australia       no Modern Applied Statistics ...       Ripley
    3  Tierney          US       no                     LISP-STAT         <NA>
    4   Ripley          UK       no            Spatial Statistics         <NA>
    5   Ripley          UK       no         Stochastic Simulation         <NA>
    6   McNeil   Australia       no     Interactive Data Analysis         <NA>
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18

    iii. right_join

    > merge(authors, books, by.x = "surname", by.y = "name", all.y = TRUE)
       surname nationality deceased                         title     other.author
    1   McNeil   Australia       no     Interactive Data Analysis             <NA>
    2   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
    3   Ripley          UK       no            Spatial Statistics             <NA>
    4   Ripley          UK       no         Stochastic Simulation             <NA>
    5  Tierney          US       no                     LISP-STAT             <NA>
    6    Tukey          US      yes     Exploratory Data Analysis             <NA>
    7 Venables   Australia       no Modern Applied Statistics ...           Ripley
    
    > right_join(authors, books, by = c("surname" = "name"))
       surname nationality deceased                         title     other.author
    1    Tukey          US      yes     Exploratory Data Analysis             <NA>
    2 Venables   Australia       no Modern Applied Statistics ...           Ripley
    3  Tierney          US       no                     LISP-STAT             <NA>
    4   Ripley          UK       no            Spatial Statistics             <NA>
    5   Ripley          UK       no         Stochastic Simulation             <NA>
    6   McNeil   Australia       no     Interactive Data Analysis             <NA>
    7   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19

    iv. full_join

    > merge(authors, books, by.x = "surname", by.y = "name", all = TRUE)
       surname nationality deceased                         title     other.author
    1   McNeil   Australia       no     Interactive Data Analysis             <NA>
    2   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
    3   Ripley          UK       no            Spatial Statistics             <NA>
    4   Ripley          UK       no         Stochastic Simulation             <NA>
    5  Tierney          US       no                     LISP-STAT             <NA>
    6    Tukey          US      yes     Exploratory Data Analysis             <NA>
    7 Venables   Australia       no Modern Applied Statistics ...           Ripley
    
    > full_join(authors, books, by = c("surname" = "name"))
       surname nationality deceased                         title     other.author
    1    Tukey          US      yes     Exploratory Data Analysis             <NA>
    2 Venables   Australia       no Modern Applied Statistics ...           Ripley
    3  Tierney          US       no                     LISP-STAT             <NA>
    4   Ripley          UK       no            Spatial Statistics             <NA>
    5   Ripley          UK       no         Stochastic Simulation             <NA>
    6   McNeil   Australia       no     Interactive Data Analysis             <NA>
    7   R Core        <NA>     <NA>          An Introduction to R Venables & Smith
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19

    注意:这里由于数据集的原因,right_join和full_join的结果看不出来差距,要明白其中原理,右连接是以Y数据集为准,全连接是要顾及两个数据集。

    4. example of using ‘incomparables’

    > x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
    > x
      k1 k2 data
    1 NA  1    1
    2 NA NA    2
    3  3 NA    3
    4  4  4    4
    5  5  5    5
    > y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
    > y
      k1 k2 data
    1 NA NA    1
    2  2 NA    2
    3 NA  3    3
    4  4  4    4
    5  5  5    5
    
    # inner_join
    > merge(x, y, by = c("k1","k2")) # NA's match
      k1 k2 data.x data.y
    1  4  4      4      4
    2  5  5      5      5
    3 NA NA      2      1
    
    # 按照指定列merge,注意一个NA分别对应了两次NA
    > merge(x, y, by = "k1") # NA's match, so 6 rows
      k1 k2.x data.x k2.y data.y
    1  4    4      4    4      4
    2  5    5      5    5      5
    3 NA    1      1   NA      1
    4 NA    1      1    3      3
    5 NA   NA      2   NA      1
    6 NA   NA      2    3      3
    > merge(x, y, by = "k2") # NA's match, so 6 rows
      k2 k1.x data.x k1.y data.y
    1  4    4      4    4      4
    2  5    5      5    5      5
    3 NA   NA      2   NA      1
    4 NA   NA      2    2      2
    5 NA    3      3   NA      1
    6 NA    3      3    2      2
    
    # 去掉空值
    > merge(x, y, by = "k2", incomparables = NA) # 2 rows
      k2 k1.x data.x k1.y data.y
    1  4    4      4    4      4
    2  5    5      5    5      5
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
  • 相关阅读:
    uniapp 小程序canvas插入网络图片
    解决:Loading class `com.mysql.jdbc.Driver‘. This is deprecated.
    (附源码)ssm失物招领平台 毕业设计 271621
    cookie时效无限延长方案
    Qt开发技术:Q3D图表开发笔记(三):Q3DSurface三维曲面图介绍、Demo以及代码详解
    Ant Design+react 路由跳转
    嵌入式分享合集16
    【Java面试】面试遇到宽泛的问题,这么回答就稳了,谈谈你对Redis的理解
    学习笔记——《LINUX设备驱动程序(第三版)》Linux设备模型:内核添加、删除设备、驱动程序
    【Linux】socket网络编程
  • 原文地址:https://blog.csdn.net/narutodzx/article/details/127977849