chineseocr测试具体部署步骤(不用web界面)

源项目地址：
https://github.com/chineseocr/chineseocr
由于chineseocr需要在web上展示检测结果，还需要安装web相关内容，我的硬件是nvidia agx orin只需要在本地查看检测结果，做如下操作
找到源码项目中的test.ipynb,改写成test.py

首先将下载源项目地址下载的model放入项目的models文件夹内

import cv2
import time
from PIL import Image
import numpy as np
import os
import json
import time
#import web
import numpy as np
from PIL import Image
from config import *
from apphelper.image import union_rbox,adjust_box_to_origin,base64_to_PIL
from application import trainTicket,idcard
from text.opencv_dnn_detect import angle_detect
from main import TextOcrModel 

print("aaaaaa: ",yoloTextFlag)#打印yoloTextFlag可以看到当前使用的什么文字检测引擎，在config.py更改文字检测引擎

if yoloTextFlag =='keras' or AngleModelFlag=='tf' or ocrFlag=='keras':
    if GPU:
        os.environ["CUDA_VISIBLE_DEVICES"] = str(GPUID)
        import tensorflow as tf
        #from keras import backend as K
        from tensorflow.compat.v1.keras import backend as K
        tf.compat.v1.disable_eager_execution()
        #config = tf.ConfigProto()
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allocator_type = 'BFC'
        config.gpu_options.per_process_gpu_memory_fraction = 0.3## GPU最大占用量
        config.gpu_options.allow_growth = True##GPU是否可动态增加
        K.set_session(tf.compat.v1.Session(config=config))
        #K.get_session().run(tf.global_variables_initializer())
        K.get_session().run(tf.compat.v1.global_variables_initializer())
    
    else:
      ##CPU启动
      os.environ["CUDA_VISIBLE_DEVICES"] = ''

if yoloTextFlag=='opencv':
    scale,maxScale = IMGSIZE
    from text.opencv_dnn_detect import text_detect
elif yoloTextFlag=='darknet':
    scale,maxScale = IMGSIZE
    from text.darknet_detect import text_detect
elif yoloTextFlag=='keras':
    scale,maxScale = IMGSIZE[0],2048
    from text.keras_detect import  text_detect
else:
     print( "err,text engine in keras\opencv\darknet")
     

if ocr_redis:
    ##多任务并发识别
    from helper.redisbase import redisDataBase
    ocr = redisDataBase().put_values
else:   
    from crnn.keys import alphabetChinese,alphabetEnglish
    if ocrFlag=='keras':
        from crnn.network_keras import CRNN
        if chineseModel:
            alphabet = alphabetChinese
            if LSTMFLAG:
                ocrModel = ocrModelKerasLstm
            else:
                ocrModel = ocrModelKerasDense
        else:
            ocrModel = ocrModelKerasEng
            alphabet = alphabetEnglish
            LSTMFLAG = True
            
    elif ocrFlag=='torch':
        from crnn.network_torch import CRNN
        if chineseModel:
            alphabet = alphabetChinese
            if LSTMFLAG:
                ocrModel = ocrModelTorchLstm
            else:
                ocrModel = ocrModelTorchDense
                
        else:
            ocrModel = ocrModelTorchEng
            alphabet = alphabetEnglish
            LSTMFLAG = True
    elif ocrFlag=='opencv':
        from crnn.network_dnn import CRNN
        ocrModel = ocrModelOpencv
        alphabet = alphabetChinese
    else:
        print( "err,ocr engine in keras\opencv\darknet")
     
    nclass = len(alphabet)+1   
    if ocrFlag=='opencv':
        crnn = CRNN(alphabet=alphabet)
    else:
        crnn = CRNN( 32, 1, nclass, 256, leakyRelu=False,lstmFlag=LSTMFLAG,GPU=GPU,alphabet=alphabet)
    if os.path.exists(ocrModel):
        crnn.load_weights(ocrModel)
    else:
        print("download model or tranform model with tools!")
        
    ocr = crnn.predict_job
    
   

model =  TextOcrModel(ocr,text_detect,angle_detect)
from apphelper.image import xy_rotate_box,box_rotate,solve





def plot_box(img,boxes):
    blue = (0, 0, 0) #18
    tmp = np.copy(img)
    for box in boxes:
         cv2.rectangle(tmp, (int(box[0]),int(box[1])), (int(box[2]), int(box[3])), blue, 1) #19
    
    return Image.fromarray(tmp) 

def plot_boxes(img,angle, result,color=(0,0,0)):
    tmp = np.array(img)
    c = color
    h,w = img.shape[:2]
    thick = int((h + w) / 300)
    i = 0
    if angle in [90,270]:
        imgW,imgH = img.shape[:2]
        
    else:
        imgH,imgW= img.shape[:2]

    for line in result:
        cx =line['cx']
        cy = line['cy']
        degree =line['degree']
        w  = line['w']
        h = line['h']

        x1,y1,x2,y2,x3,y3,x4,y4 = xy_rotate_box(cx, cy, w, h, degree/180*np.pi)
        
        x1,y1,x2,y2,x3,y3,x4,y4 = box_rotate([x1,y1,x2,y2,x3,y3,x4,y4],angle=(360-angle)%360,imgH=imgH,imgW=imgW)
        cx  =np.mean([x1,x2,x3,x4])
        cy  = np.mean([y1,y2,y3,y4])
        cv2.line(tmp,(int(x1),int(y1)),(int(x2),int(y2)),c,1)
        cv2.line(tmp,(int(x2),int(y2)),(int(x3),int(y3)),c,1)
        cv2.line(tmp,(int(x3),int(y3)),(int(x4),int(y4)),c,1)
        cv2.line(tmp,(int(x4),int(y4)),(int(x1),int(y1)),c,1)
        mess=str(i)
        cv2.putText(tmp, mess, (int(cx), int(cy)),0, 1e-3 * h, c, thick // 2)
        i+=1
    cv2.imwrite("/home/nvidia/chineseocr-app/output/1.jpg",tmp)
    return Image.fromarray(tmp).convert('RGB')


def plot_rboxes(img,boxes,color=(0,0,0)):
    tmp = np.array(img)
    c = color
    h,w = img.shape[:2]
    thick = int((h + w) / 300)
    i = 0


    for box in boxes:

        x1,y1,x2,y2,x3,y3,x4,y4 = box
        
        
        cx  =np.mean([x1,x2,x3,x4])
        cy  = np.mean([y1,y2,y3,y4])
        cv2.line(tmp,(int(x1),int(y1)),(int(x2),int(y2)),c,1)
        cv2.line(tmp,(int(x2),int(y2)),(int(x3),int(y3)),c,1)
        cv2.line(tmp,(int(x3),int(y3)),(int(x4),int(y4)),c,1)
        cv2.line(tmp,(int(x4),int(y4)),(int(x1),int(y1)),c,1)
        mess=str(i)
        cv2.putText(tmp, mess, (int(cx), int(cy)),0, 1e-3 * h, c, thick // 2)
        i+=1

    cv2.imwrite("/home/nvidia/chineseocr-app/output/2.jpg",tmp)
    return Image.fromarray(tmp).convert('RGB')



p = '/home/nvidia/chineseocr-app/test/zw2_0.jpg'
img = cv2.imread(p)

h,w = img.shape[:2]
timeTake = time.time()
scale=608
maxScale=2048

result,angle= model.model(img,
                                    detectAngle=True,##是否进行文字方向检测
                                    scale=scale,
                                    maxScale=maxScale,
                                    MAX_HORIZONTAL_GAP=80,##字符之间的最大间隔，用于文本行的合并
                                    MIN_V_OVERLAPS=0.6,
                                    MIN_SIZE_SIM=0.6,
                                    TEXT_PROPOSALS_MIN_SCORE=0.1,
                                    TEXT_PROPOSALS_NMS_THRESH=0.7,
                                    TEXT_LINE_NMS_THRESH = 0.9,##文本行之间测iou值
                                     LINE_MIN_SCORE=0.1,                                             
                                    leftAdjustAlph=0,##对检测的文本行进行向左延伸
                                    rightAdjustAlph=0.1,##对检测的文本行进行向右延伸
                                   )
        
timeTake = time.time()-timeTake
print("angle: ",angle)
print('It take:{}s'.format(timeTake))
for line in result:
    print(line['text'])
plot_boxes(img,angle, result,color=(0,0,0))
#cv2.imwrite("/home/nvidia/chineseocr-app/output/1.jpg",img1)

boxes,scores  = model.detect_box(img,608,2048)
plot_box(img,boxes)
#cv2.imwrite("/home/nvidia/chineseocr-app/output/2.jpg",img2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

config.py
在这里插入图片描述
在config.py中看到可以选择三种文字检测引擎，首先使用keras引擎。
我现在的tensorflow版本是2.10.0，源码中使用的tensorflow应该是比较老的版本，很多函数的写法已经不一样了，比如调试的时候经常出现的报错
AttributeError: module ‘tensorflow’ has no attribute ‘ConfigProto’
此类是函数写法在tensorflow升级后改变了，应该在tf与函数之间添加compat.v1
在这里插入图片描述

还有numpy中增加维度的newaxis报错
TypeError: list indices must be integers or slices, not tuple，不知道什么原因，改成expand_dims后正常。

报错：TypeError: only integer scalar arrays can be converted to a scalar index
好像numpy版本变更导致不能直接索引，改成第三行的写法就可以了
在这里插入图片描述
这样opencv和darknet两种引擎可以使用,keras引擎暂未调通，暂定

测试结果带角度的ocr预测效果较差，将带角度预测关闭反而正确率提高

相关阅读:
微信小程序项目如何用Hbuild启动，先让对方在微信开发平台将你的微信号添加成开发成员。
Python|(解决)苹果mac电脑无法打开“chromedriver”，因为无法验证开发者，要怎么解决？
kubernetes（5）续4
JUC第七讲：关键字final详解
 亚马逊云购买和配置苹果MacOs系统的云主机
 开发一款招聘小程序需要具备哪些功能？
无频闪护眼灯哪个好？五款无频闪护眼台灯推荐
 Vmware 扩展硬盘空间后的操作-Ubuntu
这8个Wireshark使用技巧，一看就会！
JavaEE初阶：多线程（进阶）
原文地址：https://blog.csdn.net/Mintary/article/details/127918012