• WGCNA教程 官网教程 总结版 代码tutorials 1


    1. Data input and cleaning: PDF documentR script
    2. Network construction and module detection
      1. Automatic, one-step network construction and module detection: PDF documentR script
      2. Step-by-step network construction and module detection: PDF documentR script
      3. Dealing with large datasets: block-wise network construction and module detection: PDF documentR script
    3. Relating modules to external clinical traits and identifying important genes: PDF documentR script
    4. Interfacing network analysis with other data such as functional annotation and gene ontology PDF documentR script
    5. Network visualization using WGCNA functions: PDF documentR script
    6. Export of networks to external software: PDF documentR script

    1. Data input and cleaning: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA);
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. #Read in the female liver data set
    17. femData = read.csv("LiverFemale3600.csv");
    18. # Take a quick look at what is in the data set:
    19. dim(femData);
    20. names(femData);
    21. #=====================================================================================
    22. #
    23. # Code chunk 2
    24. #
    25. #=====================================================================================
    26. datExpr0 = as.data.frame(t(femData[, -c(1:8)]));
    27. names(datExpr0) = femData$substanceBXH;
    28. rownames(datExpr0) = names(femData)[-c(1:8)];
    29. #=====================================================================================
    30. #
    31. # Code chunk 3
    32. #
    33. #=====================================================================================
    34. gsg = goodSamplesGenes(datExpr0, verbose = 3);
    35. gsg$allOK
    36. #=====================================================================================
    37. #
    38. # Code chunk 4
    39. #
    40. #=====================================================================================
    41. if (!gsg$allOK)
    42. {
    43. # Optionally, print the gene and sample names that were removed:
    44. if (sum(!gsg$goodGenes)>0)
    45. printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
    46. if (sum(!gsg$goodSamples)>0)
    47. printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
    48. # Remove the offending genes and samples from the data:
    49. datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
    50. }
    51. #=====================================================================================
    52. #
    53. # Code chunk 5
    54. #
    55. #=====================================================================================
    56. sampleTree = hclust(dist(datExpr0), method = "average");
    57. # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
    58. # The user should change the dimensions if the window is too large or too small.
    59. sizeGrWindow(12,9)
    60. #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
    61. par(cex = 0.6);
    62. par(mar = c(0,4,2,0))
    63. plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
    64. cex.axis = 1.5, cex.main = 2)
    65. #=====================================================================================
    66. #
    67. # Code chunk 6
    68. #
    69. #=====================================================================================
    70. # Plot a line to show the cut
    71. abline(h = 15, col = "red");
    72. # Determine cluster under the line
    73. clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
    74. table(clust)
    75. # clust 1 contains the samples we want to keep.
    76. keepSamples = (clust==1)
    77. datExpr = datExpr0[keepSamples, ]
    78. nGenes = ncol(datExpr)
    79. nSamples = nrow(datExpr)
    80. #=====================================================================================
    81. #
    82. # Code chunk 7
    83. #
    84. #=====================================================================================
    85. traitData = read.csv("ClinicalTraits.csv");
    86. dim(traitData)
    87. names(traitData)
    88. # remove columns that hold information we do not need.
    89. allTraits = traitData[, -c(31, 16)];
    90. allTraits = allTraits[, c(2, 11:36) ];
    91. dim(allTraits)
    92. names(allTraits)
    93. # Form a data frame analogous to expression data that will hold the clinical traits.
    94. femaleSamples = rownames(datExpr);
    95. traitRows = match(femaleSamples, allTraits$Mice);
    96. datTraits = allTraits[traitRows, -1];
    97. rownames(datTraits) = allTraits[traitRows, 1];
    98. collectGarbage();
    99. #=====================================================================================
    100. #
    101. # Code chunk 8
    102. #
    103. #=====================================================================================
    104. # Re-cluster samples
    105. sampleTree2 = hclust(dist(datExpr), method = "average")
    106. # Convert traits to a color representation: white means low, red means high, grey means missing entry
    107. traitColors = numbers2colors(datTraits, signed = FALSE);
    108. # Plot the sample dendrogram and the colors underneath.
    109. plotDendroAndColors(sampleTree2, traitColors,
    110. groupLabels = names(datTraits),
    111. main = "Sample dendrogram and trait heatmap")
    112. #=====================================================================================
    113. #
    114. # Code chunk 9
    115. #
    116. #=====================================================================================
    117. save(datExpr, datTraits, file = "FemaleLiver-01-dataInput.RData")

     Tutorials for WGCNA R package

    step2.

    Network construction and module detection

    1. Automatic, one-step network construction and module detection: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Allow multi-threading within WGCNA. This helps speed up certain calculations.
    17. # At present this call is necessary for the code to work.
    18. # Any error here may be ignored but you may want to update WGCNA if you see one.
    19. # Caution: skip this line if you run RStudio or other third-party R environments.
    20. # See note above.
    21. enableWGCNAThreads()
    22. # Load the data saved in the first part
    23. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    24. #The variable lnames contains the names of loaded variables.
    25. lnames
    26. #=====================================================================================
    27. #
    28. # Code chunk 2
    29. #
    30. #=====================================================================================
    31. # Choose a set of soft-thresholding powers
    32. powers = c(c(1:10), seq(from = 12, to=20, by=2))
    33. # Call the network topology analysis function
    34. sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
    35. # Plot the results:
    36. sizeGrWindow(9, 5)
    37. par(mfrow = c(1,2));
    38. cex1 = 0.9;
    39. # Scale-free topology fit index as a function of the soft-thresholding power
    40. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    41. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
    42. main = paste("Scale independence"));
    43. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    44. labels=powers,cex=cex1,col="red");
    45. # this line corresponds to using an R^2 cut-off of h
    46. abline(h=0.90,col="red")
    47. # Mean connectivity as a function of the soft-thresholding power
    48. plot(sft$fitIndices[,1], sft$fitIndices[,5],
    49. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
    50. main = paste("Mean connectivity"))
    51. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
    52. #=====================================================================================
    53. #
    54. # Code chunk 3
    55. #
    56. #=====================================================================================
    57. net = blockwiseModules(datExpr, power = 6,
    58. TOMType = "unsigned", minModuleSize = 30,
    59. reassignThreshold = 0, mergeCutHeight = 0.25,
    60. numericLabels = TRUE, pamRespectsDendro = FALSE,
    61. saveTOMs = TRUE,
    62. saveTOMFileBase = "femaleMouseTOM",
    63. verbose = 3)
    64. #=====================================================================================
    65. #
    66. # Code chunk 4
    67. #
    68. #=====================================================================================
    69. # open a graphics window
    70. sizeGrWindow(12, 9)
    71. # Convert labels to colors for plotting
    72. mergedColors = labels2colors(net$colors)
    73. # Plot the dendrogram and the module colors underneath
    74. plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
    75. "Module colors",
    76. dendroLabels = FALSE, hang = 0.03,
    77. addGuide = TRUE, guideHang = 0.05)
    78. #=====================================================================================
    79. #
    80. # Code chunk 5
    81. #
    82. #=====================================================================================
    83. moduleLabels = net$colors
    84. moduleColors = labels2colors(net$colors)
    85. MEs = net$MEs;
    86. geneTree = net$dendrograms[[1]];
    87. save(MEs, moduleLabels, moduleColors, geneTree,
    88. file = "FemaleLiver-02-networkConstruction-auto.RData")
    1. Step-by-step network construction and module detection: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Allow multi-threading within WGCNA. This helps speed up certain calculations.
    17. # At present this call is necessary for the code to work.
    18. # Any error here may be ignored but you may want to update WGCNA if you see one.
    19. # Caution: skip this line if you run RStudio or other third-party R environments.
    20. # See note above.
    21. enableWGCNAThreads()
    22. # Load the data saved in the first part
    23. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    24. #The variable lnames contains the names of loaded variables.
    25. lnames
    26. #=====================================================================================
    27. #
    28. # Code chunk 2
    29. #
    30. #=====================================================================================
    31. # Choose a set of soft-thresholding powers
    32. powers = c(c(1:10), seq(from = 12, to=20, by=2))
    33. # Call the network topology analysis function
    34. sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
    35. # Plot the results:
    36. sizeGrWindow(9, 5)
    37. par(mfrow = c(1,2));
    38. cex1 = 0.9;
    39. # Scale-free topology fit index as a function of the soft-thresholding power
    40. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    41. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
    42. main = paste("Scale independence"));
    43. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    44. labels=powers,cex=cex1,col="red");
    45. # this line corresponds to using an R^2 cut-off of h
    46. abline(h=0.90,col="red")
    47. # Mean connectivity as a function of the soft-thresholding power
    48. plot(sft$fitIndices[,1], sft$fitIndices[,5],
    49. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
    50. main = paste("Mean connectivity"))
    51. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
    52. #=====================================================================================
    53. #
    54. # Code chunk 3
    55. #
    56. #=====================================================================================
    57. net = blockwiseModules(datExpr, power = 6,
    58. TOMType = "unsigned", minModuleSize = 30,
    59. reassignThreshold = 0, mergeCutHeight = 0.25,
    60. numericLabels = TRUE, pamRespectsDendro = FALSE,
    61. saveTOMs = TRUE,
    62. saveTOMFileBase = "femaleMouseTOM",
    63. verbose = 3)
    64. #=====================================================================================
    65. #
    66. # Code chunk 4
    67. #
    68. #=====================================================================================
    69. # open a graphics window
    70. sizeGrWindow(12, 9)
    71. # Convert labels to colors for plotting
    72. mergedColors = labels2colors(net$colors)
    73. # Plot the dendrogram and the module colors underneath
    74. plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
    75. "Module colors",
    76. dendroLabels = FALSE, hang = 0.03,
    77. addGuide = TRUE, guideHang = 0.05)
    78. #=====================================================================================
    79. #
    80. # Code chunk 5
    81. #
    82. #=====================================================================================
    83. moduleLabels = net$colors
    84. moduleColors = labels2colors(net$colors)
    85. MEs = net$MEs;
    86. geneTree = net$dendrograms[[1]];
    87. save(MEs, moduleLabels, moduleColors, geneTree,
    88. file = "FemaleLiver-02-networkConstruction-auto.RData")

    Dealing with large datasets: block-wise network construction and module detection: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Allow multi-threading within WGCNA. This helps speed up certain calculations.
    17. # At present this call is necessary.
    18. # Any error here may be ignored but you may want to update WGCNA if you see one.
    19. # Caution: skip this line if you run RStudio or other third-party R environments.
    20. # See note above.
    21. enableWGCNAThreads()
    22. # Load the data saved in the first part
    23. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    24. #The variable lnames contains the names of loaded variables.
    25. lnames
    26. #=====================================================================================
    27. #
    28. # Code chunk 2
    29. #
    30. #=====================================================================================
    31. # Choose a set of soft-thresholding powers
    32. powers = c(c(1:10), seq(from = 12, to=20, by=2))
    33. # Call the network topology analysis function
    34. sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
    35. # Plot the results:
    36. sizeGrWindow(9, 5)
    37. par(mfrow = c(1,2));
    38. cex1 = 0.9;
    39. # Scale-free topology fit index as a function of the soft-thresholding power
    40. plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    41. xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
    42. main = paste("Scale independence"));
    43. text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
    44. labels=powers,cex=cex1,col="red");
    45. # this line corresponds to using an R^2 cut-off of h
    46. abline(h=0.90,col="red")
    47. # Mean connectivity as a function of the soft-thresholding power
    48. plot(sft$fitIndices[,1], sft$fitIndices[,5],
    49. xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
    50. main = paste("Mean connectivity"))
    51. text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
    52. #=====================================================================================
    53. #
    54. # Code chunk 3
    55. #
    56. #=====================================================================================
    57. bwnet = blockwiseModules(datExpr, maxBlockSize = 2000,
    58. power = 6, TOMType = "unsigned", minModuleSize = 30,
    59. reassignThreshold = 0, mergeCutHeight = 0.25,
    60. numericLabels = TRUE,
    61. saveTOMs = TRUE,
    62. saveTOMFileBase = "femaleMouseTOM-blockwise",
    63. verbose = 3)
    64. #=====================================================================================
    65. #
    66. # Code chunk 4
    67. #
    68. #=====================================================================================
    69. # Load the results of single-block analysis
    70. load(file = "FemaleLiver-02-networkConstruction-auto.RData");
    71. # Relabel blockwise modules
    72. bwLabels = matchLabels(bwnet$colors, moduleLabels);
    73. # Convert labels to colors for plotting
    74. bwModuleColors = labels2colors(bwLabels)
    75. #=====================================================================================
    76. #
    77. # Code chunk 5
    78. #
    79. #=====================================================================================
    80. # open a graphics window
    81. sizeGrWindow(6,6)
    82. # Plot the dendrogram and the module colors underneath for block 1
    83. plotDendroAndColors(bwnet$dendrograms[[1]], bwModuleColors[bwnet$blockGenes[[1]]],
    84. "Module colors", main = "Gene dendrogram and module colors in block 1",
    85. dendroLabels = FALSE, hang = 0.03,
    86. addGuide = TRUE, guideHang = 0.05)
    87. # Plot the dendrogram and the module colors underneath for block 2
    88. plotDendroAndColors(bwnet$dendrograms[[2]], bwModuleColors[bwnet$blockGenes[[2]]],
    89. "Module colors", main = "Gene dendrogram and module colors in block 2",
    90. dendroLabels = FALSE, hang = 0.03,
    91. addGuide = TRUE, guideHang = 0.05)
    92. #=====================================================================================
    93. #
    94. # Code chunk 6
    95. #
    96. #=====================================================================================
    97. sizeGrWindow(12,9)
    98. plotDendroAndColors(geneTree,
    99. cbind(moduleColors, bwModuleColors),
    100. c("Single block", "2 blocks"),
    101. main = "Single block gene dendrogram and module colors",
    102. dendroLabels = FALSE, hang = 0.03,
    103. addGuide = TRUE, guideHang = 0.05)
    104. #=====================================================================================
    105. #
    106. # Code chunk 7
    107. #
    108. #=====================================================================================
    109. singleBlockMEs = moduleEigengenes(datExpr, moduleColors)$eigengenes;
    110. blockwiseMEs = moduleEigengenes(datExpr, bwModuleColors)$eigengenes;
    111. #=====================================================================================
    112. #
    113. # Code chunk 8
    114. #
    115. #=====================================================================================
    116. single2blockwise = match(names(singleBlockMEs), names(blockwiseMEs))
    117. signif(diag(cor(blockwiseMEs[, single2blockwise], singleBlockMEs)), 3)

    Relating modules to external clinical traits and identifying important genes: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Load the expression and trait data saved in the first part
    17. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    18. #The variable lnames contains the names of loaded variables.
    19. lnames
    20. # Load network data saved in the second part.
    21. lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
    22. lnames
    23. #=====================================================================================
    24. #
    25. # Code chunk 2
    26. #
    27. #=====================================================================================
    28. # Define numbers of genes and samples
    29. nGenes = ncol(datExpr);
    30. nSamples = nrow(datExpr);
    31. # Recalculate MEs with color labels
    32. MEs0 = moduleEigengenes(datExpr, moduleColors)$eigengenes
    33. MEs = orderMEs(MEs0)
    34. moduleTraitCor = cor(MEs, datTraits, use = "p");
    35. moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
    36. #=====================================================================================
    37. #
    38. # Code chunk 3
    39. #
    40. #=====================================================================================
    41. sizeGrWindow(10,6)
    42. # Will display correlations and their p-values
    43. textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
    44. signif(moduleTraitPvalue, 1), ")", sep = "");
    45. dim(textMatrix) = dim(moduleTraitCor)
    46. par(mar = c(6, 8.5, 3, 3));
    47. # Display the correlation values within a heatmap plot
    48. labeledHeatmap(Matrix = moduleTraitCor,
    49. xLabels = names(datTraits),
    50. yLabels = names(MEs),
    51. ySymbols = names(MEs),
    52. colorLabels = FALSE,
    53. colors = greenWhiteRed(50),
    54. textMatrix = textMatrix,
    55. setStdMargins = FALSE,
    56. cex.text = 0.5,
    57. zlim = c(-1,1),
    58. main = paste("Module-trait relationships"))
    59. #=====================================================================================
    60. #
    61. # Code chunk 4
    62. #
    63. #=====================================================================================
    64. # Define variable weight containing the weight column of datTrait
    65. weight = as.data.frame(datTraits$weight_g);
    66. names(weight) = "weight"
    67. # names (colors) of the modules
    68. modNames = substring(names(MEs), 3)
    69. geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
    70. MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
    71. names(geneModuleMembership) = paste("MM", modNames, sep="");
    72. names(MMPvalue) = paste("p.MM", modNames, sep="");
    73. geneTraitSignificance = as.data.frame(cor(datExpr, weight, use = "p"));
    74. GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
    75. names(geneTraitSignificance) = paste("GS.", names(weight), sep="");
    76. names(GSPvalue) = paste("p.GS.", names(weight), sep="");
    77. #=====================================================================================
    78. #
    79. # Code chunk 5
    80. #
    81. #=====================================================================================
    82. module = "brown"
    83. column = match(module, modNames);
    84. moduleGenes = moduleColors==module;
    85. sizeGrWindow(7, 7);
    86. par(mfrow = c(1,1));
    87. verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
    88. abs(geneTraitSignificance[moduleGenes, 1]),
    89. xlab = paste("Module Membership in", module, "module"),
    90. ylab = "Gene significance for body weight",
    91. main = paste("Module membership vs. gene significance\n"),
    92. cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)
    93. #=====================================================================================
    94. #
    95. # Code chunk 6
    96. #
    97. #=====================================================================================
    98. names(datExpr)
    99. #=====================================================================================
    100. #
    101. # Code chunk 7
    102. #
    103. #=====================================================================================
    104. names(datExpr)[moduleColors=="brown"]
    105. #=====================================================================================
    106. #
    107. # Code chunk 8
    108. #
    109. #=====================================================================================
    110. annot = read.csv(file = "GeneAnnotation.csv");
    111. dim(annot)
    112. names(annot)
    113. probes = names(datExpr)
    114. probes2annot = match(probes, annot$substanceBXH)
    115. # The following is the number or probes without annotation:
    116. sum(is.na(probes2annot))
    117. # Should return 0.
    118. #=====================================================================================
    119. #
    120. # Code chunk 9
    121. #
    122. #=====================================================================================
    123. # Create the starting data frame
    124. geneInfo0 = data.frame(substanceBXH = probes,
    125. geneSymbol = annot$gene_symbol[probes2annot],
    126. LocusLinkID = annot$LocusLinkID[probes2annot],
    127. moduleColor = moduleColors,
    128. geneTraitSignificance,
    129. GSPvalue)
    130. # Order modules by their significance for weight
    131. modOrder = order(-abs(cor(MEs, weight, use = "p")));
    132. # Add module membership information in the chosen order
    133. for (mod in 1:ncol(geneModuleMembership))
    134. {
    135. oldNames = names(geneInfo0)
    136. geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
    137. MMPvalue[, modOrder[mod]]);
    138. names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""),
    139. paste("p.MM.", modNames[modOrder[mod]], sep=""))
    140. }
    141. # Order the genes in the geneInfo variable first by module color, then by geneTraitSignificance
    142. geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.weight));
    143. geneInfo = geneInfo0[geneOrder, ]
    144. #=====================================================================================
    145. #
    146. # Code chunk 10
    147. #
    148. #=====================================================================================
    149. write.csv(geneInfo, file = "geneInfo.csv")

    Interfacing network analysis with other data such as functional annotation and gene ontology PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Load the expression and trait data saved in the first part
    17. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    18. #The variable lnames contains the names of loaded variables.
    19. lnames
    20. # Load network data saved in the second part.
    21. lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
    22. lnames
    23. #=====================================================================================
    24. #
    25. # Code chunk 2
    26. #
    27. #=====================================================================================
    28. # Read in the probe annotation
    29. annot = read.csv(file = "GeneAnnotation.csv");
    30. # Match probes in the data set to the probe IDs in the annotation file
    31. probes = names(datExpr)
    32. probes2annot = match(probes, annot$substanceBXH)
    33. # Get the corresponding Locuis Link IDs
    34. allLLIDs = annot$LocusLinkID[probes2annot];
    35. # $ Choose interesting modules
    36. intModules = c("brown", "red", "salmon")
    37. for (module in intModules)
    38. {
    39. # Select module probes
    40. modGenes = (moduleColors==module)
    41. # Get their entrez ID codes
    42. modLLIDs = allLLIDs[modGenes];
    43. # Write them into a file
    44. fileName = paste("LocusLinkIDs-", module, ".txt", sep="");
    45. write.table(as.data.frame(modLLIDs), file = fileName,
    46. row.names = FALSE, col.names = FALSE)
    47. }
    48. # As background in the enrichment analysis, we will use all probes in the analysis.
    49. fileName = paste("LocusLinkIDs-all.txt", sep="");
    50. write.table(as.data.frame(allLLIDs), file = fileName,
    51. row.names = FALSE, col.names = FALSE)
    52. #=====================================================================================
    53. #
    54. # Code chunk 3
    55. #
    56. #=====================================================================================
    57. GOenr = GOenrichmentAnalysis(moduleColors, allLLIDs, organism = "mouse", nBestP = 10);
    58. #=====================================================================================
    59. #
    60. # Code chunk 4
    61. #
    62. #=====================================================================================
    63. tab = GOenr$bestPTerms[[4]]$enrichment
    64. #=====================================================================================
    65. #
    66. # Code chunk 5
    67. #
    68. #=====================================================================================
    69. names(tab)
    70. #=====================================================================================
    71. #
    72. # Code chunk 6
    73. #
    74. #=====================================================================================
    75. write.table(tab, file = "GOEnrichmentTable.csv", sep = ",", quote = TRUE, row.names = FALSE)
    76. #=====================================================================================
    77. #
    78. # Code chunk 7
    79. #
    80. #=====================================================================================
    81. keepCols = c(1, 2, 5, 6, 7, 12, 13);
    82. screenTab = tab[, keepCols];
    83. # Round the numeric columns to 2 decimal places:
    84. numCols = c(3, 4);
    85. screenTab[, numCols] = signif(apply(screenTab[, numCols], 2, as.numeric), 2)
    86. # Truncate the the term name to at most 40 characters
    87. screenTab[, 7] = substring(screenTab[, 7], 1, 40)
    88. # Shorten the column names:
    89. colnames(screenTab) = c("module", "size", "p-val", "Bonf", "nInTerm", "ont", "term name");
    90. rownames(screenTab) = NULL;
    91. # Set the width of R's output. The reader should play with this number to obtain satisfactory output.
    92. options(width=95)
    93. # Finally, display the enrichment table:
    94. screenTab

    Network visualization using WGCNA functions: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Load the expression and trait data saved in the first part
    17. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    18. #The variable lnames contains the names of loaded variables.
    19. lnames
    20. # Load network data saved in the second part.
    21. lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
    22. lnames
    23. nGenes = ncol(datExpr)
    24. nSamples = nrow(datExpr)
    25. #=====================================================================================
    26. #
    27. # Code chunk 2
    28. #
    29. #=====================================================================================
    30. # Calculate topological overlap anew: this could be done more efficiently by saving the TOM
    31. # calculated during module detection, but let us do it again here.
    32. dissTOM = 1-TOMsimilarityFromExpr(datExpr, power = 6);
    33. # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
    34. plotTOM = dissTOM^7;
    35. # Set diagonal to NA for a nicer plot
    36. diag(plotTOM) = NA;
    37. # Call the plot function
    38. sizeGrWindow(9,9)
    39. TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
    40. #=====================================================================================
    41. #
    42. # Code chunk 3
    43. #
    44. #=====================================================================================
    45. nSelect = 400
    46. # For reproducibility, we set the random seed
    47. set.seed(10);
    48. select = sample(nGenes, size = nSelect);
    49. selectTOM = dissTOM[select, select];
    50. # There's no simple way of restricting a clustering tree to a subset of genes, so we must re-cluster.
    51. selectTree = hclust(as.dist(selectTOM), method = "average")
    52. selectColors = moduleColors[select];
    53. # Open a graphical window
    54. sizeGrWindow(9,9)
    55. # Taking the dissimilarity to a power, say 10, makes the plot more informative by effectively changing
    56. # the color palette; setting the diagonal to NA also improves the clarity of the plot
    57. plotDiss = selectTOM^7;
    58. diag(plotDiss) = NA;
    59. TOMplot(plotDiss, selectTree, selectColors, main = "Network heatmap plot, selected genes")
    60. #=====================================================================================
    61. #
    62. # Code chunk 4
    63. #
    64. #=====================================================================================
    65. # Recalculate module eigengenes
    66. MEs = moduleEigengenes(datExpr, moduleColors)$eigengenes
    67. # Isolate weight from the clinical traits
    68. weight = as.data.frame(datTraits$weight_g);
    69. names(weight) = "weight"
    70. # Add the weight to existing module eigengenes
    71. MET = orderMEs(cbind(MEs, weight))
    72. # Plot the relationships among the eigengenes and the trait
    73. sizeGrWindow(5,7.5);
    74. par(cex = 0.9)
    75. plotEigengeneNetworks(MET, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle
    76. = 90)
    77. #=====================================================================================
    78. #
    79. # Code chunk 5
    80. #
    81. #=====================================================================================
    82. # Plot the dendrogram
    83. sizeGrWindow(6,6);
    84. par(cex = 1.0)
    85. plotEigengeneNetworks(MET, "Eigengene dendrogram", marDendro = c(0,4,2,0),
    86. plotHeatmaps = FALSE)
    87. # Plot the heatmap matrix (note: this plot will overwrite the dendrogram plot)
    88. par(cex = 1.0)
    89. plotEigengeneNetworks(MET, "Eigengene adjacency heatmap", marHeatmap = c(3,4,2,2),
    90. plotDendrograms = FALSE, xLabelsAngle = 90)

    6.Export of networks to external software: PDF documentR script

    1. #=====================================================================================
    2. #
    3. # Code chunk 1
    4. #
    5. #=====================================================================================
    6. # Display the current working directory
    7. getwd();
    8. # If necessary, change the path below to the directory where the data files are stored.
    9. # "." means current directory. On Windows use a forward slash / instead of the usual \.
    10. workingDir = ".";
    11. setwd(workingDir);
    12. # Load the WGCNA package
    13. library(WGCNA)
    14. # The following setting is important, do not omit.
    15. options(stringsAsFactors = FALSE);
    16. # Load the expression and trait data saved in the first part
    17. lnames = load(file = "FemaleLiver-01-dataInput.RData");
    18. #The variable lnames contains the names of loaded variables.
    19. lnames
    20. # Load network data saved in the second part.
    21. lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
    22. lnames
    23. #=====================================================================================
    24. #
    25. # Code chunk 2
    26. #
    27. #=====================================================================================
    28. # Recalculate topological overlap
    29. TOM = TOMsimilarityFromExpr(datExpr, power = 6);
    30. # Read in the annotation file
    31. annot = read.csv(file = "GeneAnnotation.csv");
    32. # Select module
    33. module = "brown";
    34. # Select module probes
    35. probes = names(datExpr)
    36. inModule = (moduleColors==module);
    37. modProbes = probes[inModule];
    38. # Select the corresponding Topological Overlap
    39. modTOM = TOM[inModule, inModule];
    40. dimnames(modTOM) = list(modProbes, modProbes)
    41. # Export the network into an edge list file VisANT can read
    42. vis = exportNetworkToVisANT(modTOM,
    43. file = paste("VisANTInput-", module, ".txt", sep=""),
    44. weighted = TRUE,
    45. threshold = 0,
    46. probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) )
    47. #=====================================================================================
    48. #
    49. # Code chunk 3
    50. #
    51. #=====================================================================================
    52. nTop = 30;
    53. IMConn = softConnectivity(datExpr[, modProbes]);
    54. top = (rank(-IMConn) <= nTop)
    55. vis = exportNetworkToVisANT(modTOM[top, top],
    56. file = paste("VisANTInput-", module, "-top30.txt", sep=""),
    57. weighted = TRUE,
    58. threshold = 0,
    59. probeToGene = data.frame(annot$substanceBXH, annot$gene_symbol) )
    60. #=====================================================================================
    61. #
    62. # Code chunk 4
    63. #
    64. #=====================================================================================
    65. # Recalculate topological overlap if needed
    66. TOM = TOMsimilarityFromExpr(datExpr, power = 6);
    67. # Read in the annotation file
    68. annot = read.csv(file = "GeneAnnotation.csv");
    69. # Select modules
    70. modules = c("brown", "red");
    71. # Select module probes
    72. probes = names(datExpr)
    73. inModule = is.finite(match(moduleColors, modules));
    74. modProbes = probes[inModule];
    75. modGenes = annot$gene_symbol[match(modProbes, annot$substanceBXH)];
    76. # Select the corresponding Topological Overlap
    77. modTOM = TOM[inModule, inModule];
    78. dimnames(modTOM) = list(modProbes, modProbes)
    79. # Export the network into edge and node list files Cytoscape can read
    80. cyt = exportNetworkToCytoscape(modTOM,
    81. edgeFile = paste("CytoscapeInput-edges-", paste(modules, collapse="-"), ".txt", sep=""),
    82. nodeFile = paste("CytoscapeInput-nodes-", paste(modules, collapse="-"), ".txt", sep=""),
    83. weighted = TRUE,
    84. threshold = 0.02,
    85. nodeNames = modProbes,
    86. altNodeNames = modGenes,
    87. nodeAttr = moduleColors[inModule]);

  • 相关阅读:
    mysql统计整个数据库记录条数
    C++ 统计程序运行时间
    EF列表分页查询(单表、多表),排除参数为空的条件
    【数据结构】| 并查集及其优化实现
    Dify后端源码目录结构和蓝图
    Vue 路由 使用
    CSS - 响应式布局(二)响应式栅格系统
    linux发展历程
    企望制造ERP存在远程命令执行漏洞 附POC
    从SAP ECC升级到SAP S4HANA, 几个Key Points
  • 原文地址:https://blog.csdn.net/qq_52813185/article/details/127651588