• 用支持向量机SVM进行光学字符识别OCR


    1. letters<-read.csv("letterdata.csv") # 载入数据文件
    2. str(letters)
    3. 'data.frame': 20000 obs. of 17 variables:
    4. $ letter: Factor w/ 26 levels "A","B","C","D",..: 20 9 4 14 7 19 2 1 10 13 ...
    5. $ xbox : int 2 5 4 7 2 4 4 1 2 11 ...
    6. $ ybox : int 8 12 11 11 1 11 2 1 2 15 ...
    7. $ width : int 3 3 6 6 3 5 5 3 4 13 ...
    8. $ height: int 5 7 8 6 1 8 4 2 4 9 ...
    9. $ onpix : int 1 2 6 3 1 3 4 1 2 7 ...
    10. $ xbar : int 8 10 10 5 8 8 8 8 10 13 ...
    11. $ ybar : int 13 5 6 9 6 8 7 2 6 2 ...
    12. $ x2bar : int 0 5 2 4 6 6 6 2 2 6 ...
    13. $ y2bar : int 6 4 6 6 6 9 6 2 6 2 ...
    14. $ xybar : int 6 13 10 4 6 5 7 8 12 12 ...
    15. $ x2ybar: int 10 3 3 4 5 6 6 2 4 1 ...
    16. $ xy2bar: int 8 9 7 10 9 6 6 8 8 9 ...
    17. $ xedge : int 0 2 3 6 1 0 2 1 1 8 ...
    18. $ xedgey: int 8 8 7 10 7 8 8 6 6 1 ...
    19. $ yedge : int 0 4 3 2 5 9 7 2 1 1 ...
    20. $ yedgex: int 8 10 9 8 10 7 10 7 7 8 ...

    对数据进行拆分,按一定比例拆分为训练集和测试集(4:1)

    1. letters_train<-letters[1:16000,]
    2. letters_test<-letters[16001:20000,]

    安装加载算法包

    1. install.packages("kernlab")
    2. library(kernlab)

    建模,采用线性核函数,查看模型结构

    1. > letter_classifier<-ksvm(letter~.,data=letters_train,kernel="vanilladot")
    2. Setting default kernel parameters
    3. > letter_classifier
    4. Support Vector Machine object of class "ksvm"
    5. SV type: C-svc (classification)
    6. parameter : cost C = 1
    7. Linear (vanilla) kernel function.
    8. Number of Support Vectors : 7037
    9. Objective Function Value : -14.1746 -20.0072 -23.5628 -6.2009 -7.5524 -32.7694 -49.9786 -18.1824 -62.1111 -32.7284 -16.2209 -32.2837 -28.9777 -51.2195 -13.276 -35.6217 -30.8612 -16.5256 -14.6811 -32.7475 -30.3219 -7.7956 -11.8138 -32.3463 -13.1262 -9.2692 -153.1654 -52.9678 -76.7744 -119.2067 -165.4437 -54.6237 -41.9809 -67.2688 -25.1959 -27.6371 -26.4102 -35.5583 -41.2597 -122.164 -187.9178 -222.0856 -21.4765 -10.3752 -56.3684 -12.2277 -49.4899 -9.3372 -19.2092 -11.1776 -100.2186 -29.1397 -238.0516 -77.1985 -8.3339 -4.5308 -139.8534 -80.8854 -20.3642 -13.0245 -82.5151 -14.5032 -26.7509 -18.5713 -23.9511 -27.3034 -53.2731 -11.4773 -5.12 -13.9504 -4.4982 -3.5755 -8.4914 -40.9716 -49.8182 -190.0269 -43.8594 -44.8667 -45.2596 -13.5561 -17.7664 -87.4105 -107.1056 -37.0245 -30.7133 -112.3218 -32.9619 -27.2971 -35.5836 -17.8586 -5.1391 -43.4094 -7.7843 -16.6785 -58.5103 -159.9936 -49.0782 -37.8426 -32.8002 -74.5249 -133.3423 -11.1638 -5.3575 -12.438 -30.9907 -141.6924 -54.2953 -179.0114 -99.8896 -10.288 -15.1553 -3.7815 -67.6123 -7.696 -88.9304 -47.6448 -94.3718 -70.2733 -71.5057 -21.7854 -12.7657 -7.4383 -23.502 -13.1055 -239.9708 -30.4193 -25.2113 -136.2795 -140.9565 -9.8122 -34.4584 -6.3039 -60.8421 -66.5793 -27.2816 -214.3225 -34.7796 -16.7631 -135.7821 -160.6279 -45.2949 -25.1023 -144.9059 -82.2352 -327.7154 -142.0613 -158.8821 -32.2181 -32.8887 -52.9641 -25.4937 -47.9936 -6.8991 -9.7293 -36.436 -70.3907 -187.7611 -46.9371 -89.8103 -143.4213 -624.3645 -119.2204 -145.4435 -327.7748 -33.3255 -64.0607 -145.4831 -116.5903 -36.2977 -66.3762 -44.8248 -7.5088 -217.9246 -12.9699 -30.504 -2.0369 -6.126 -14.4448 -21.6337 -57.3084 -20.6915 -184.3625 -20.1052 -4.1484 -4.5344 -0.828 -121.4411 -7.9486 -58.5604 -21.4878 -13.5476 -5.646 -15.629 -28.9576 -20.5959 -76.7111 -27.0119 -94.7101 -15.1713 -10.0222 -7.6394 -1.5784 -87.6952 -6.2239 -99.3711 -101.0906 -45.6639 -24.0725 -61.7702 -24.1583 -52.2368 -234.3264 -39.9749 -48.8556 -34.1464 -20.9664 -11.4525 -123.0277 -6.4903 -5.1865 -8.8016 -9.4618 -21.7742 -24.2361 -123.3984 -31.4404 -88.3901 -30.0924 -13.8198 -9.2701 -3.0823 -87.9624 -6.3845 -13.968 -65.0702 -105.523 -13.7403 -13.7625 -50.4223 -2.933 -8.4289 -80.3381 -36.4147 -112.7485 -4.1711 -7.8989 -1.2676 -90.8037 -21.4919 -7.2235 -47.9557 -3.383 -20.433 -64.6138 -45.5781 -56.1309 -6.1345 -18.6307 -2.374 -72.2553 -111.1885 -106.7664 -23.1323 -19.3765 -54.9819 -34.2953 -64.4756 -20.4115 -6.689 -4.378 -59.141 -34.2468 -58.1509 -33.8665 -10.6902 -53.1387 -13.7478 -20.1987 -55.0923 -3.8058 -60.0382 -235.4841 -12.6837 -11.7407 -17.3058 -9.7167 -65.8498 -17.1051 -42.8131 -53.1054 -25.0437 -15.302 -44.0749 -16.9582 -62.9773 -5.204 -5.2963 -86.1704 -3.7209 -6.3445 -1.1264 -122.5771 -23.9041 -355.0145 -31.1013 -32.619 -4.9664 -84.1048 -134.5957 -72.8371 -23.9002 -35.3077 -11.7119 -22.2889 -1.8598 -59.2174 -8.8994 -150.742 -1.8533 -1.9711 -9.9676 -0.5207 -26.9229 -30.429 -5.6289
    10. Training error : 0.130062

    评估模型

    1. > letter_prediction<-predict(letter_classifier,letters_test)
    2. > head(letter_prediction)
    3. [1] U N V X N H
    4. 26 Levels: A B C D E F G H I J K L M N O P Q R S T U V W ... Z
    5. > table(letter_prediction,letters_test$letter)
    6. letter_prediction A B C D E F G H I J K
    7. A 144 0 0 0 0 0 0 0 0 1 0
    8. B 0 121 0 5 2 0 1 2 0 0 1
    9. C 0 0 120 0 4 0 10 2 2 0 1
    10. D 2 2 0 156 0 1 3 10 4 3 4
    11. E 0 0 5 0 127 3 1 1 0 0 3
    12. F 0 0 0 0 0 138 2 2 6 0 0
    13. G 1 1 2 1 9 2 123 2 0 0 1
    14. H 0 0 0 1 0 1 0 102 0 2 3
    15. I 0 1 0 0 0 1 0 0 141 8 0
    16. J 0 1 0 0 0 1 0 2 5 128 0
    17. K 1 1 9 0 0 0 2 5 0 0 118
    18. L 0 0 0 0 2 0 1 1 0 0 0
    19. M 0 0 1 1 0 0 1 1 0 0 0
    20. N 0 0 0 0 0 1 0 1 0 0 0
    21. O 1 0 2 1 0 0 1 2 0 1 0
    22. P 0 0 0 1 0 2 1 0 0 0 0
    23. Q 0 0 0 0 0 0 8 2 0 0 0
    24. R 0 7 0 0 1 0 3 8 0 0 13
    25. S 1 1 0 0 1 0 3 0 1 1 0
    26. T 0 0 0 0 3 2 0 0 0 0 1
    27. U 1 0 3 1 0 0 0 2 0 0 0
    28. V 0 0 0 0 0 1 3 4 0 0 0
    29. W 0 0 0 0 0 0 1 0 0 0 0
    30. X 0 1 0 0 2 0 0 1 3 0 1
    31. Y 3 0 0 0 0 0 0 1 0 0 0
    32. Z 2 0 0 0 1 0 0 0 3 4 0
    33. letter_prediction L M N O P Q R S T U V
    34. A 0 1 2 2 0 5 0 1 1 1 0
    35. B 0 1 0 0 2 2 3 5 0 0 2
    36. C 3 0 0 2 0 0 0 0 0 0 0
    37. D 3 0 5 5 3 1 4 0 0 0 0
    38. E 4 0 0 0 0 2 0 10 0 0 0
    39. F 0 0 0 0 16 0 0 3 0 0 1
    40. G 2 1 0 1 2 8 2 4 3 0 0
    41. H 2 3 4 20 0 2 3 0 3 0 2
    42. I 0 0 0 0 1 0 0 3 0 0 0
    43. J 0 0 0 1 1 3 0 2 0 0 0
    44. K 0 0 2 0 1 0 7 0 1 3 0
    45. L 133 0 0 0 0 1 0 5 0 0 0
    46. M 0 135 4 0 0 0 0 0 0 3 0
    47. N 0 0 145 0 0 0 3 0 0 1 0
    48. O 0 0 1 99 3 3 0 0 0 3 0
    49. P 0 0 0 2 130 0 0 0 0 0 0
    50. Q 3 0 0 3 1 124 0 5 0 0 0
    51. R 0 0 1 1 1 0 138 0 1 0 1
    52. S 1 0 0 0 0 14 0 101 3 0 0
    53. T 0 0 0 0 0 0 0 3 133 1 0
    54. U 0 0 0 1 0 0 0 0 0 152 0
    55. V 0 1 2 1 0 3 1 0 0 0 126
    56. W 0 2 0 0 0 0 0 0 0 4 4
    57. X 6 0 0 1 0 0 0 1 0 0 0
    58. Y 0 0 0 0 7 0 0 0 3 0 0
    59. Z 0 0 0 0 0 0 0 18 3 0 0
    60. letter_prediction W X Y Z
    61. A 1 0 0 1
    62. B 0 1 0 0
    63. C 0 0 0 0
    64. D 0 3 3 1
    65. E 0 2 0 3
    66. F 0 1 2 0
    67. G 0 1 0 0
    68. H 0 0 1 0
    69. I 0 5 1 1
    70. J 0 1 0 6
    71. K 0 5 0 0
    72. L 0 0 0 1
    73. M 8 0 0 0
    74. N 2 0 0 0
    75. O 0 0 0 0
    76. P 0 0 1 0
    77. Q 0 0 2 0
    78. R 0 0 0 0
    79. S 0 2 0 10
    80. T 0 0 2 2
    81. U 0 1 1 0
    82. V 1 0 4 0
    83. W 127 0 0 0
    84. X 0 137 1 1
    85. Y 0 0 127 0
    86. Z 0 0 0 132
    87. > argreement<-letter_prediction==letters_test$letter
    88. > table(argreement)
    89. argreement
    90. FALSE TRUE
    91. 643 3357
    92. > prop.table(table(argreement))
    93. argreement
    94. FALSE TRUE
    95. 0.16075 0.83925

    优化模型

    1. > letter_classifier_rbf<-ksvm(letter~.,data=letters_train,kernel="rbfdot")
    2. > letter_classifier_rbf<-predict(letter_classifier_rbf,letters_test)
    3. >
    4. > letter_classifier_rbf<-ksvm(letter~.,data=letters_train,kernel="rbfdot")
    5. >
    6. > letter_prediction_rbf<-predict(letter_classifier_rbf,letters_test)
    7. > agreement_rbf<-letter_prediction_rbf==letters_test$letter
    8. > table(agreement_rbf)
    9. agreement_rbf
    10. FALSE TRUE
    11. 278 3722
    12. > prop.table(table(agreement_rbf))
    13. agreement_rbf
    14. FALSE TRUE
    15. 0.0695 0.9305
    16. > letters_new<-read.csv("letterdata-new.csv")
    17. > letter_prediction_rbf_new<-predict(letter_classifier_rbf,letters_new)
    18. > letter_prediction_rbf_new
    19. [1] M
    20. 26 Levels: A B C D E F G H I J K L M N O P Q R S T U V W ... Z

  • 相关阅读:
    Docker—概述与安装
    (王道考研计算机网络)第四章网络层-第七、八节:移动IP和网络层设备
    嵌入式学习--1线协议(以ds18b20为例)
    Vue中常用的rules校验规则
    GEO振弦式钢筋计适用范围
    算法: C# 中将 Dictionary 集合用作 Hashmap 等价类型
    Linux-进程管理
    Qt+sqlite3使用事务提升插入效率
    聊一聊 Valgrind 监视非托管内存泄露和崩溃
    Linux--CE--ansible常用模块(1)
  • 原文地址:https://blog.csdn.net/qq_65144447/article/details/134004219