R/d2 和 S/C4 是用于估计总体标准差的无偏估计方法,通常用于控制图中。这些估计方法的主要目的是通过样本数据来估计总体标准差,以便监测过程的稳定性和变异性,而不需要收集整个总体的数据。
具体来说:
- import numpy as np
- import scipy.stats as stats
- from scipy.stats import norm
- import matplotlib.pyplot as plt
- # 输入数据
- data = [
- [6.4, 7.0, 6.4, 6.4, 7.1],
- [6.8, 6.4, 6.4, 6.3, 6.5],
- [6.3, 7.1, 6.5, 6.4, 7.0],
- [6.1, 6.8, 5.9, 5.8, 6.0],
- [6.4, 6.9, 6.8, 6.5, 6.9],
- [6.6, 6.0, 6.1, 6.2, 5.9],
- [6.3, 6.9, 6.6, 6.2, 6.8],
- [6.4, 5.6, 6.2, 6.0, 5.8],
- [6.3, 6.7, 6.6, 6.4, 6.3],
- [6.7, 5.9, 5.8, 6.3, 6.2],
- [6.6, 7.0, 6.5, 6.4, 7.1],
- [6.8, 6.2, 6.5, 6.2, 5.8]
- ]
- # data1 输入过程上下限
- upper_spec_limit = 7
- lower_spec_limit = 5.8
-
-
- data1 = [
- [5.05, 5.03, 5.04, 4.91, 4.99, 4.97, 4.97, 4.91, 4.97, 5.05],
- [4.96, 4.94, 4.93, 4.91, 5.04, 5.01, 4.96, 4.93, 4.90, 5.02],
- [4.99, 5.05, 5.04, 5.05, 5.00, 4.98, 4.90, 5.05, 4.91, 4.93],
- [5.05, 4.93, 4.91, 4.99, 4.92, 4.91, 5.04, 4.98, 5.03, 5.01],
- [5.05, 5.00, 4.92, 4.93, 5.00, 4.98, 5.10, 4.95, 4.91, 4.98],
- [5.02, 5.01, 5.01, 4.91, 5.05, 4.97, 5.03, 5.03, 4.98, 5.06],
- [4.91, 5.02, 5.07, 5.05, 5.08, 5.01, 5.03, 5.03, 5.07, 4.96],
- [4.95, 4.98, 4.91, 5.03, 5.03, 5.08, 4.92, 5.09, 5.10, 5.00],
- [4.98, 4.97, 4.97, 5.09, 5.03, 5.07, 5.08, 4.96, 4.96, 5.03],
- [4.99, 4.91, 5.02, 4.97, 5.04, 4.99, 5.05, 4.93, 5.05, 4.92]
- ] #检测不符合正态,凑合着演示用
- # data1 输入过程上下限
- # upper_spec_limit = 5.2
- # lower_spec_limit = 4.8
-
- # 3. Shapiro-Wilk检验
- shapiro_stat, shapiro_p = stats.shapiro(data)
- print("\nShapiro-Wilk检验统计值:", shapiro_stat)
- print("Shapiro-Wilk检验p-value:", shapiro_p)
- if shapiro_p > 0.05:
- print("数据可能来自正态分布")
- else:
- print("数据可能不来自正态分布")
-
-
- # 选择适当的d2及C4 值(与样本容量有关)
- sample_size = len(data[0]) # 假设所有组的样本容量相同
- # 根据样本容量选择 C4 值
- C4_values = {
- 5: 0.9400,
- 6: 0.9515,
- 7: 0.9594,
- 8: 0.9650,
- 9: 0.9693,
- 10: 0.9727,
- }
- d2_values = {
- 5: 2.326,
- 6: 2.534,
- 7: 2.704,
- 8: 2.847,
- 9: 2.970,
- 10: 3.078,
- }
-
- print("sample_size子组样本容量: ", sample_size)
- C4 = C4_values.get(sample_size, None)
- print("根据子组样本容量选择C4值: ",C4)
- d2 = d2_values.get(sample_size, None)
- print("根据子组样本容量选择d2值: ",d2)
-
-
- # 计算data整体标准差
- population_std = np.std(data)
-
- # 计算data样本标准差
- sample_std = np.std(data, ddof=1) # 使用ddof参数来指定自由度
-
- print("\ndata整体标准差:", population_std)
- print("data样本标准差:", sample_std)
-
-
- # 计算X-bar图的x_double_bar中心线均值,用于绘制概率密度曲线
- x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_double_bar中心线均值
-
-
- # 计算R/d2估计的总体标准差
- r_values = [max(subgroup) - min(subgroup) for subgroup in data]
- r_bar = np.mean(r_values)
- sigma_r = r_bar / d2
-
- # 计算S/C4估计的总体标准差
- s_values = [np.std(subgroup, ddof=1) for subgroup in data]
- s_bar = np.mean(s_values)
- sigma_s = s_bar / C4
-
-
- # 计算CPK, 其中x_double_bar中心线均值
- cpk_r = min((upper_spec_limit - x_double_bar) / (3 * sigma_r), (x_double_bar - lower_spec_limit) / (3 * sigma_r))
- cpk_s = min((upper_spec_limit - x_double_bar) / (3 * sigma_s), (x_double_bar - lower_spec_limit) / (3 * sigma_s))
-
- print("\n通过R/d2估计的总体标准差 (σ):", sigma_r)
- print("通过S/C4估计的总体标准差 (σ):", sigma_s)
- print("R/d2法计算的CPK:", cpk_r)
- print("S/C4法计算的CPK:", cpk_s)
-
-
- # 计算标准分数
- z_upper_r = (upper_spec_limit - x_double_bar) / sigma_r # 使用R/d2估计的σ
- z_lower_r = (lower_spec_limit - x_double_bar) / sigma_r # 使用R/d2估计的σ
-
- z_upper_s = (upper_spec_limit - x_double_bar) / sigma_s # 使用S/C4估计的σ
- z_lower_s = (lower_spec_limit - x_double_bar) / sigma_s # 使用S/C4估计的σ
-
-
- # 计算在规格限内的概率(使用R/d2估计的σ)
- probability_within_spec_r = norm.cdf(z_upper_r) - norm.cdf(z_lower_r)
-
- # 计算在规格限内的概率(使用S/C4估计的σ)
- probability_within_spec_s = norm.cdf(z_upper_s) - norm.cdf(z_lower_s)
-
- print("\n使用R/d2法估计的概率(在规格限内):", probability_within_spec_r)
- print("使用S/C4法估计的概率(在规格限内):", probability_within_spec_s)
-
-
-
-
- # 将数据展开为一维数组,用于画data数据集直方图
- data_flat = [item for sublist in data for item in sublist]
- # flat_data = np.concatenate(data) # 扁平化数据
-
-
- # 绘制整体数据集的直方图并叠加概率密度曲线,标准差用sigma_r=R/d2估计
- plt.figure(figsize=(5, 5))
- plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
- xmin, xmax = plt.xlim()
- x = np.linspace(xmin, xmax, 100)
- p = norm.pdf(x, x_double_bar, sigma_r)
- plt.plot(x, p, 'r', linewidth=2, label='PDF (Population)')
- plt.axvline(x_double_bar, color='r', linestyle='--', label='X-Bar̄')
- plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
- plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
- plt.xlabel('Value')
- plt.ylabel('Probability')
- plt.title('Histogram (Population)')
- plt.show()
Shapiro-Wilk检验统计值: 0.9730015993118286
Shapiro-Wilk检验p-value: 0.20416395366191864
数据可能来自正态分布
sample_size子组样本容量: 5
根据子组样本容量选择C4值: 0.94
根据子组样本容量选择d2值: 2.326
data整体标准差: 0.3711094477673968
data样本标准差: 0.37424122858811426
通过R/d2估计的总体标准差 (σ): 0.3116938950988822
通过S/C4估计的总体标准差 (σ): 0.3245199375603761
R/d2法计算的CPK: 0.6238314176245208
S/C4法计算的CPK: 0.5991756497496196
使用R/d2法估计的概率(在规格限内): 0.9454219702532735
使用S/C4法估计的概率(在规格限内): 0.9351733645056909
[Finished in 13.9s]
-------------------------
绘制X-Bar和R图,及数据集直方图概率密度曲线
- import numpy as np
- import matplotlib.pyplot as plt
- from scipy.stats import norm
-
- # 输入数据
- data = [
- [6.4, 7.0, 6.4, 6.4, 7.1],
- [6.8, 6.4, 6.4, 6.3, 6.5],
- [6.3, 7.1, 6.5, 6.4, 7.0],
- [6.1, 6.8, 5.9, 5.8, 6.0],
- [6.4, 6.9, 6.8, 6.5, 6.9],
- [6.6, 6.0, 6.1, 6.2, 5.9],
- [6.3, 6.9, 6.6, 6.2, 6.8],
- [6.4, 5.6, 6.2, 6.0, 5.8],
- [6.3, 6.7, 6.6, 6.4, 6.3],
- [6.7, 5.9, 5.8, 6.3, 6.2],
- [6.6, 7.0, 6.5, 6.4, 7.1],
- [6.8, 6.2, 6.5, 6.2, 5.8]
- ]
-
- # 输入过程上下限
- upper_spec_limit = 7
- lower_spec_limit = 5.5
-
- # 控制图参数
- A2 = 0.577
- D4 = 2.113
- D3 = 0
- d2 = 2.326 # 从表格或标准文献中查找
-
-
-
- # 计算X-Bar和R
- x_bar = np.mean(data, axis=1)
- r_values = np.ptp(data, axis=1)
-
- # 计算X-Bar和R的平均值
- x_bar_bar = np.mean(x_bar)
- r_bar = np.mean(r_values)
- # r_values = [max(subgroup) - min(subgroup) for subgroup in data] #极差均值
- # x_double_bar = np.mean([np.mean(subgroup) for subgroup in data]) #x_bar_bar中心线均值
-
-
- # 将数据展开为一维数组,用于画data数据集直方图
- data_flat = [item for sublist in data for item in sublist]
- # 计算整体标准差
- # population_std = np.std(data_flat)
- population_std = r_bar / d2
-
-
- # 计算UCL和LCL (X-Bar)
- UCL_x_bar = x_bar_bar + A2 * r_bar
- LCL_x_bar = x_bar_bar - A2 * r_bar
-
- # 计算UCL和LCL (R)
- UCL_r = D4 * r_bar
- LCL_r = D3 * r_bar
-
- # 绘制X-Bar控制图
- plt.figure(figsize=(6, 6))
- plt.subplot(3, 1, 1)
-
- plt.plot(x_bar, 'o-', label='X-Bar')
- plt.axhline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
- plt.axhline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
- plt.axhline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
- plt.ylabel('X-Bar')
- # plt.legend()
-
- plt.subplot(3, 1, 2)
- plt.plot(r_values, 'o-', color='b', label='R')
- plt.axhline(r_bar, color='r', linestyle='--', label='R̄')
- plt.axhline(UCL_r, color='g', linestyle='--', label='UCL(R)')
- plt.axhline(LCL_r, color='g', linestyle='--', label='LCL(R)')
- # plt.xlabel('Sample')
- plt.ylabel('R')
- # plt.legend()
- # plt.title('X-Bar-R')
-
- # 绘制整体数据集的直方图并叠加概率密度曲线
- plt.subplot(3, 1, 3)
- plt.hist(data_flat, bins=12, density=True, alpha=0.6, color='b', label='Histogram')
- xmin, xmax = plt.xlim()
- x = np.linspace(xmin, xmax, 100)
- p = norm.pdf(x, np.mean(data_flat), population_std)
- plt.plot(x, p, 'k', linewidth=2, label='PDF (Population)')
- plt.axvline(x_bar_bar, color='r', linestyle='--', label='X-Bar̄')
- plt.axvline(UCL_x_bar, color='g', linestyle='--', label='UCL(X-Bar)')
- plt.axvline(LCL_x_bar, color='g', linestyle='--', label='LCL(X-Bar)')
- plt.axvline(upper_spec_limit, color='b', linestyle='-', label='USL')
- plt.axvline(lower_spec_limit, color='b', linestyle='-', label='LSL')
- plt.xlabel('Value')
- plt.ylabel('Probability')
- # plt.legend()
- plt.title('Histogram (Population)')
- plt.tight_layout()
- plt.show()
-
-
- print("np.std(data_flat)估计总体标准差",np.std(data_flat))
- print("r_bar/d2估计总体标准差",r_bar / d2)
-
-
-
-
-------------------------------------