• R/d2及S/C4估计总体标准差,比较其CPK及规格限概率的差异


    R/d2 和 S/C4 是用于估计总体标准差的无偏估计方法,通常用于控制图中。这些估计方法的主要目的是通过样本数据来估计总体标准差,以便监测过程的稳定性和变异性,而不需要收集整个总体的数据。

    具体来说:

    • R图中的 R/d2 和 S图中的 S/C4 都是无偏估计,其中 d2 和 C4 是常数,用于修正范围和标准差以获得更接近总体标准差的估计值。
    1. import numpy as np
    2. import scipy.stats as stats
    3. from scipy.stats import norm
    4. import matplotlib.pyplot as plt
    5. # 输入数据
    6. data = [
    7. [6.4, 7.0, 6.4, 6.4, 7.1],
    8. [6.8, 6.4, 6.4, 6.3, 6.5],
    9. [6.3, 7.1, 6.5, 6.4, 7.0],
    10. [6.1, 6.8, 5.9, 5.8, 6.0],
    11. [6.4, 6.9, 6.8, 6.5, 6.9],
    12. [6.6, 6.0, 6.1, 6.2, 5.9],
    13. [6.3, 6.9, 6.6, 6.2, 6.8],
    14. [6.4, 5.6, 6.2, 6.0, 5.8],
    15. [6.3, 6.7, 6.6, 6.4, 6.3],
    16. [6.7, 5.9, 5.8, 6.3, 6.2],
    17. [6.6, 7.0, 6.5, 6.4, 7.1],
    18. [6.8, 6.2, 6.5, 6.2, 5.8]
    19. ]
    20. # data1 输入过程上下限
    21. upper_spec_limit = 7
    22. lower_spec_limit = 5.8
    23. data1 = [
    24. [5.05, 5.03, 5.04, 4.91, 4.99, 4.97, 4.97, 4.91, 4.97, 5.05],
    25. [4.96, 4.94, 4.93, 4.91, 5.04, 5.01, 4.96, 4.93, 4.90, 5.02],
    26. [4.99, 5.05, 5.04, 5.05, 5.00, 4.98, 4.90, 5.05, 4.91, 4.93],
    27. [5.05, 4.93, 4.91, 4.99, 4.92, 4.91, 5.04, 4.98, 5.03, 5.01],
    28. [5.05, 5.00, 4.92, 4.93, 5.00, 4.98, 5.10, 4.95, 4.91, 4.98],
    29. [5.02, 5.01, 5.01, 4.91, 5.05, 4.97, 5.03, 5.03, 4.98, 5.06],
    30. [4.91, 5.02, 5.07, 5.05, 5.08, 5.01, 5.03, 5.03, 5.07, 4.96],
    31. [4.95, 4.98, 4.91, 5.03, 5.03, 5.08, 4.92, 5.09, 5.10, 5.00],
    32. [4.98, 4.97, 4.97, 5.09, 5.03, 5.07, 5.08, 4.96, 4.96, 5.03],
    33. [4.99, 4.91, 5.02, 4.97, 5.04, 4.99, 5.05, 4.93, 5.05, 4.92]
    34. ] #检测不符合正态,凑合着演示用
    35. # data1 输入过程上下限
    36. # upper_spec_limit = 5.2
    37. # lower_spec_limit = 4.8
    38. # 3. Shapiro-Wilk检验
    39. shapiro_stat, shapiro_p = stats.shapiro(data)
    40. print("\nShapiro-Wilk检验统计值:", shapiro_stat)
    41. print("Shapiro-Wilk检验p-value:", shapiro_p)
    42. if shapiro_p > 0.05:
    43. print("数据可能来自正态分布")
    44. else:
    45. print("数据可能不来自正态分布")
    46. # 选择适当的d2及C4 值(与样本容量有关)
    47. sample_size = len(data[0]) # 假设所有组的样本容量相同
    48. # 根据样本容量选择 C4 值
    49. C4_values = {
    50. 5: 0.9400,
    51. 6: 0.9515,
    52. 7: 0.9594,
    53. 8: 0.9650,
    54. 9: 0.9693,
    55. 10: 0.9727,
    56. }
    57. d2_values = {
    58. 5: 2.326,
    59. 6: 2.534,
    60. 7: 2.704,
    61. 8: 2.847,
    62. 9: 2.970,
    63. 10: 3.078,
    64. }
    65. print("sample_size子组样本容量: ", sample_size)
    66. C4 = C4_values.get(sample_size, None)
    67. print("根据子组样本容量选择C4值: ",C4)
    68. d2 = d2_values.get(sample_size, None)
    69. print("根据子组样本容量选择d2值: ",d2)
    70. # 计算data整体标准差
    71. population_std = np.std(data)
    72. # 计算data样本标准差
    73. sample_std = np.std(data, ddof=1) # 使用ddof参数来指定自由度
    74. print("\ndata整体标准差:", population_std)
    75. print("data样本标准差:", sample_std)
    76. # 计算X-bar图的x_double_bar中心线均值,用于绘制概率密度曲线
    77. x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_double_bar中心线均值
    78. # 计算R/d2估计的总体标准差
    79. r_values = [max(subgroup) - min(subgroup) for subgroup in data]
    80. r_bar = np.mean(r_values)
    81. sigma_r = r_bar / d2
    82. # 计算S/C4估计的总体标准差
    83. s_values = [np.std(subgroup, ddof=1) for subgroup in data]
    84. s_bar = np.mean(s_values)
    85. sigma_s = s_bar / C4
    86. # 计算CPK, 其中x_double_bar中心线均值
    87. cpk_r = min((upper_spec_limit - x_double_bar) / (3 * sigma_r), (x_double_bar - lower_spec_limit) / (3 * sigma_r))
    88. cpk_s = min((upper_spec_limit - x_double_bar) / (3 * sigma_s), (x_double_bar - lower_spec_limit) / (3 * sigma_s))
    89. print("\n通过R/d2估计的总体标准差 (σ):", sigma_r)
    90. print("通过S/C4估计的总体标准差 (σ):", sigma_s)
    91. print("R/d2法计算的CPK:", cpk_r)
    92. print("S/C4法计算的CPK:", cpk_s)
    93. # 计算标准分数
    94. z_upper_r = (upper_spec_limit - x_double_bar) / sigma_r # 使用R/d2估计的σ
    95. z_lower_r = (lower_spec_limit - x_double_bar) / sigma_r # 使用R/d2估计的σ
    96. z_upper_s = (upper_spec_limit - x_double_bar) / sigma_s # 使用S/C4估计的σ
    97. z_lower_s = (lower_spec_limit - x_double_bar) / sigma_s # 使用S/C4估计的σ
    98. # 计算在规格限内的概率(使用R/d2估计的σ)
    99. probability_within_spec_r = norm.cdf(z_upper_r) - norm.cdf(z_lower_r)
    100. # 计算在规格限内的概率(使用S/C4估计的σ)
    101. probability_within_spec_s = norm.cdf(z_upper_s) - norm.cdf(z_lower_s)
    102. print("\n使用R/d2法估计的概率(在规格限内):", probability_within_spec_r)
    103. print("使用S/C4法估计的概率(在规格限内):", probability_within_spec_s)
    104. # 将数据展开为一维数组,用于画data数据集直方图
    105. data_flat = [item for sublist in data for item in sublist]
    106. # flat_data = np.concatenate(data) # 扁平化数据
    107. # 绘制整体数据集的直方图并叠加概率密度曲线,标准差用sigma_r=R/d2估计
    108. plt.figure(figsize=(5, 5))
    109. plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
    110. xmin, xmax = plt.xlim()
    111. x = np.linspace(xmin, xmax, 100)
    112. p = norm.pdf(x, x_double_bar, sigma_r)
    113. plt.plot(x, p, 'r', linewidth=2, label='PDF (Population)')
    114. plt.axvline(x_double_bar, color='r', linestyle='--', label='X-Bar̄')
    115. plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
    116. plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
    117. plt.xlabel('Value')
    118. plt.ylabel('Probability')
    119. plt.title('Histogram (Population)')
    120. plt.show()


    Shapiro-Wilk检验统计值: 0.9730015993118286
    Shapiro-Wilk检验p-value: 0.20416395366191864
    数据可能来自正态分布
    sample_size子组样本容量:  5
    根据子组样本容量选择C4值:  0.94
    根据子组样本容量选择d2值:  2.326

    data整体标准差: 0.3711094477673968
    data样本标准差: 0.37424122858811426

    通过R/d2估计的总体标准差 (σ): 0.3116938950988822
    通过S/C4估计的总体标准差 (σ): 0.3245199375603761

    R/d2法计算的CPK: 0.6238314176245208
    S/C4法计算的CPK: 0.5991756497496196

    使用R/d2法估计的概率(在规格限内): 0.9454219702532735
    使用S/C4法估计的概率(在规格限内): 0.9351733645056909

    [Finished in 13.9s]

    -------------------------
    绘制X-Bar和R图,及数据集直方图概率密度曲线

    1. import numpy as np
    2. import matplotlib.pyplot as plt
    3. from scipy.stats import norm
    4. # 输入数据
    5. data = [
    6. [6.4, 7.0, 6.4, 6.4, 7.1],
    7. [6.8, 6.4, 6.4, 6.3, 6.5],
    8. [6.3, 7.1, 6.5, 6.4, 7.0],
    9. [6.1, 6.8, 5.9, 5.8, 6.0],
    10. [6.4, 6.9, 6.8, 6.5, 6.9],
    11. [6.6, 6.0, 6.1, 6.2, 5.9],
    12. [6.3, 6.9, 6.6, 6.2, 6.8],
    13. [6.4, 5.6, 6.2, 6.0, 5.8],
    14. [6.3, 6.7, 6.6, 6.4, 6.3],
    15. [6.7, 5.9, 5.8, 6.3, 6.2],
    16. [6.6, 7.0, 6.5, 6.4, 7.1],
    17. [6.8, 6.2, 6.5, 6.2, 5.8]
    18. ]
    19. # 输入过程上下限
    20. upper_spec_limit = 7
    21. lower_spec_limit = 5.5
    22. # 控制图参数
    23. A2 = 0.577
    24. D4 = 2.113
    25. D3 = 0
    26. d2 = 2.326 # 从表格或标准文献中查找
    27. # 计算X-Bar和R
    28. x_bar = np.mean(data, axis=1)
    29. r_values = np.ptp(data, axis=1)
    30. # 计算X-Bar和R的平均值
    31. x_bar_bar = np.mean(x_bar)
    32. r_bar = np.mean(r_values)
    33. # r_values = [max(subgroup) - min(subgroup) for subgroup in data] #极差均值
    34. # x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_bar_bar中心线均值
    35. # 将数据展开为一维数组,用于画data数据集直方图
    36. data_flat = [item for sublist in data for item in sublist]
    37. # 计算整体标准差
    38. # population_std = np.std(data_flat)
    39. population_std = r_bar / d2
    40. # 计算UCL和LCL (X-Bar)
    41. UCL_x_bar = x_bar_bar + A2 * r_bar
    42. LCL_x_bar = x_bar_bar - A2 * r_bar
    43. # 计算UCL和LCL (R)
    44. UCL_r = D4 * r_bar
    45. LCL_r = D3 * r_bar
    46. # 绘制X-Bar控制图
    47. plt.figure(figsize=(6, 6))
    48. plt.subplot(3, 1, 1)
    49. plt.plot(x_bar, 'o-', label='X-Bar')
    50. plt.axhline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
    51. plt.axhline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
    52. plt.axhline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
    53. plt.ylabel('X-Bar')
    54. # plt.legend()
    55. plt.subplot(3, 1, 2)
    56. plt.plot(r_values, 'o-', color='b', label='R')
    57. plt.axhline(r_bar, color='r', linestyle='--', label='R̄')
    58. plt.axhline(UCL_r, color='g', linestyle='--', label='UCL(R)')
    59. plt.axhline(LCL_r, color='g', linestyle='--', label='LCL(R)')
    60. # plt.xlabel('Sample')
    61. plt.ylabel('R')
    62. # plt.legend()
    63. # plt.title('X-Bar-R')
    64. # 绘制整体数据集的直方图并叠加概率密度曲线
    65. plt.subplot(3, 1, 3)
    66. plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
    67. xmin, xmax = plt.xlim()
    68. x = np.linspace(xmin, xmax, 100)
    69. p = norm.pdf(x, np.mean(data_flat), population_std)
    70. plt.plot(x, p, 'k', linewidth=2, label='PDF (Population)')
    71. plt.axvline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
    72. plt.axvline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
    73. plt.axvline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
    74. plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
    75. plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
    76. plt.xlabel('Value')
    77. plt.ylabel('Probability')
    78. # plt.legend()
    79. plt.title('Histogram (Population)')
    80. plt.tight_layout()
    81. plt.show()
    82. print("np.std(data_flat)估计总体标准差",np.std(data_flat))
    83. print("r_bar/d2估计总体标准差",r_bar / d2)

    ------------------------------------- 

     

  • 相关阅读:
    寻找文本之间的不同
    14. Longest Common Prefix
    筒仓料位监测|敢不敢对“精度”下狠手!您家筒仓料位测得准吗?
    Java 集合之 List
    Python 基于 Yolov8 + CPU 实现物体检测
    智能制造车间生产线可视化
    团队管理|如何提高技术 Leader 的思考技巧?
    设计模式篇---桥接模式
    C++加持让python程序插上翅膀——利用pybind11进行c++和python联合编程示例
    【华为OD机试真题 JS】一种字符串压缩表示的解压
  • 原文地址:https://blog.csdn.net/book_dw5189/article/details/133849843