“0”数字字符零 的图片(16*16点阵):
- #Letter23Digital23R231006d.py
-
- import torch
- import torch.nn as nn
- import torch.optim as optim #optimizer = optim.SGD(model.parameters(), lr=0.01)
- from PIL import Image
- from PIL import ImageDraw
- from PIL import ImageFont
- #from PIL import ImageDraw, ImageFont
-
- from torchvision import transforms
- import matplotlib.pyplot as plt #matplotlib显示字符(结果)
-
- Times500=4590
-
-
- # 参数设置
- font_path = "arial.ttf" #"e:\\22Letter23r1002\\arial.ttf" #e:\\arial.ttf"
- #siz28e28 = 28
- siz28e14=32
- #characters = ["2","4","我"] +[str(i) for i in range(8,9)] + ["A","Z"] #["A", "B"]
- Characts01 = ["8","-","+","X","2"] +[str(i) for i in range(0,2)]
-
- print(Characts01)
-
- # 1. 生成字符和数字的点阵数据
- def render_char(char, siz28e14):
- image = Image.new("L", (siz28e14, siz28e14), "white")
- draw = ImageDraw.Draw(image)
- font = ImageFont.truetype(font_path, siz28e14)
- #-----------------------------------------
- w03 = draw.textlength(char, font=font)
- h03 = siz28e14
- print("{[w3",w03, "h3",w03 ,"]} ")
- #-----------------------------------------
- draw.text(((siz28e14-w03)/2, (siz28e14-h03)/2), char, font=font, fill="black")
- return image
-
- Data01 = []
- labels = []
- for i, char in enumerate(Characts01):
- img = render_char(char, siz28e14)
- Data01.append(transforms.ToTensor()(img))
- labels.append(i)
- print("i",i,char)
-
- #-----------------------------
- # 2. 显示取得的 arial.ttf 字体的图像
- for i in range(len(Data01)):
- plt.imshow(Data01[i].squeeze().cpu().numpy(), cmap="gray")
- plt.title("char:"+Characts01[i])
- plt.axis("off")
- # plt.show()
-
- #-----------------------------
-
- # 2. 训练神经网络模型
- class SimpleNet(nn.Module):
- def __init__(self, num_classes):
- super(SimpleNet, self).__init__()
- self.fc = nn.Linear(siz28e14 * siz28e14, num_classes)
-
- def forward(self, x):
- x = x.view(-1, siz28e14 * siz28e14)
- x = self.fc(x)
- return x
-
- model = SimpleNet(len(Characts01))
- loss_function = nn.CrossEntropyLoss()
- #optimizer = optim.SGD(model.parameters(), lr=0.01)
- optimizer = optim.SGD(model.parameters(), lr=0.003)
-
- for epoch in range(Times500):#8000):#1000):
- inputs = torch.stack(Data01)
- targets = torch.tensor(labels)
- optimizer.zero_grad()
- outputs = model(inputs)
- loss = loss_function(outputs, targets)
- loss.backward()
- optimizer.step()
-
- # 3. 使用模型进行预测
- def predict_image(img_path):
- model.eval()
- img = Image.open(img_path).convert("L").resize((siz28e14, siz28e14))
- img_tensor = transforms.ToTensor()(img).unsqueeze(0)
- output = model(img_tensor)
- _, predicted = output.max(1)
- return Characts01[predicted[0]]
-
-
- #01//8_"8"----
- im="f8_16x18.png"
- predicted_char = predict_image(im)
- print(f"预测的字符为: {predicted_char}")
-
- plt.imshow(Image.open(im))
- plt.title(f"Predicted: {predicted_char}")
- plt.show()
-
-
- #1//1-minus"1"----
- im="_1_16x16.png"
- predicted_char = predict_image(im)
- print(f"预测的字符为: {predicted_char}")
-
- plt.imshow(Image.open(im))
- plt.title(f"Predicted: {predicted_char}")
- plt.show()
-
-
- #2//我-"我"----
- im="wo19x19.png"
- predicted_char = predict_image(im)
- print(f"预测的字符为: {predicted_char}")
-
- plt.imshow(Image.open(im))
- plt.title(f"Predicted: {predicted_char}")
- plt.show()
-
-
- #3//8-"8"----
- im="8_16x16.png"
- predicted_char = predict_image(im)
- print(f"预测的字符为: {predicted_char}")
-
- # 使用matplotlib显示结果
- plt.imshow(Image.open(im))
- plt.title(f"Predicted: {predicted_char}")
- #plt.axis("off")
- plt.show()
-
- #4//minus-minus"-"----
- im="f:\\22Letter23r1002\minus16x16.png"
- predicted_char = predict_image(im)
- print(f"预测的字符为: {predicted_char}")
-
- plt.imshow(Image.open(im))
- plt.title(f"Predicted: {predicted_char}")
- plt.show()
造车先做三蹦子的原理是:
torch,tensorflew,pytorch这个相当于马达、发动机、变速器等底层原件……
一、造车要不要先研究马达、轮子、发动机?
当然!必须!
二、
研究完 元器件 马达、轮子、发动机 就该造……
就可以 造 宝马?奔驰?Prius、本田、混动了吗?
没那么简单……
对于初级架构师傅来讲……
学习 Prius的新型架构当然有用!
但是,能完整的 架构出来一辆 五菱宏光、架构出一辆老头乐,架构出一辆三蹦子……那才是一位合格的架构师
如果一个伪装的架构师问你:
我需要 三年时间、三个亿$金钱 架构出一辆 特斯了、Prius混动汽车…… 给钱吧!?
你应该这样回复他:
那我只给你三个月时间,只给你10万~100万人民币…… 请你先给俺架构出一辆 老头乐的或五菱宏光!!
做好了,
咱们开展下个任务!!
做不好,那您不叫架构师…… 骗子 称号更适合您!
同样道理,一个合格的 外科医生,
在给 患者病人 做手术以前,当然应该是先 解剖 过 小白鼠、和青蛙;
我读过 Caffe的完整代码,这相当于解剖尸体;
重新遭过轮子。
这次,就手工 撸 一个 文字(字符)识别 的框架 出来…… 作为 机器学习的“三蹦子”、小白鼠、或者“果蝇”
下面Demo就是 字符(数字、字母)识别的基本框架……
先用 torch完成……
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from PIL import Image, ImageDraw, ImageFont
- from torchvision import transforms
- import matplotlib.pyplot as plt
-
- # 参数设置
- font_path = "e:\\arial.ttf"
- siz28e28 = 28
- characters = [str(i) for i in range(0, 10)] + ["A", "B"]
-
- # 1. 生成字符和数字的点阵数据
- def render_char(char, siz28e28):
- image = Image.new("L", (siz28e28, siz28e28), "white")
- draw = ImageDraw.Draw(image)
- font = ImageFont.truetype(font_path, siz28e28)
- #----------------------------------------------------------
- # w, h = draw.textsize(char, font=font)
- # 使用textbox方法来获取文本的边界框
- # left, upper, right, lower = draw.textbbox((0, 0), char, font=font)
- # w01, h01 = right - left, lower - upper
- # print("[right",right," _left",left, " _lower",lower, " _upper",upper ,")")
- # print("(w1",w01, "h1",h01 ,"] ")
- # w02, h02 = draw.textsize(char, font=font) //draw.textsize()函数新版本DeprecationWarning: textsize is deprecated and will be removed in Pillow 10 (2023-07-01). Use textbbox or textlength instead.
- # print("{[w2",w02, "h2",w02 ,"]} ")
- w03 = draw.textlength(char, font=font)
- h03 = siz28e28
- print("{[w3",w03, "h3",w03 ,"]} ")
- #=============================================
- draw.text(((siz28e28-w03)/2, (siz28e28-h03)/2), char, font=font, fill="black")
- return image
-
- data = []
- labels = []
- for i, char in enumerate(characters):
- img = render_char(char, siz28e28)
- data.append(transforms.ToTensor()(img))
- labels.append(i)
-
- # 2. 训练神经网络模型
- class SimpleNet(nn.Module):
- def __init__(self, num_classes):
- super(SimpleNet, self).__init__()
- self.fc = nn.Linear(siz28e28 * siz28e28, num_classes)
-
- def forward(self, x):
- x = x.view(-1, siz28e28 * siz28e28)
- x = self.fc(x)
- return x
-
- model = SimpleNet(len(characters))
- loss_function = nn.CrossEntropyLoss()
- optimizer = optim.SGD(model.parameters(), lr=0.01)
-
- for epoch in range(1000):
- inputs = torch.stack(data)
- targets = torch.tensor(labels)
- optimizer.zero_grad()
- outputs = model(inputs)
- loss = loss_function(outputs, targets)
- loss.backward()
- optimizer.step()
-
- # 3. 使用模型进行预测
- def predict_image(img_path):
- model.eval()
- img = Image.open(img_path).convert("L").resize((siz28e28, siz28e28))
- img_tensor = transforms.ToTensor()(img).unsqueeze(0)
- output = model(img_tensor)
- _, predicted = output.max(1)
- return characters[predicted[0]]
-
- # 预测E盘的图像
- #img_path = "E:\\i.png"
- img_path = "E:\\256A256.png"
- predicted_char = predict_image(img_path)
- print(f"预测的字符为: {predicted_char}")
-
- # 使用matplotlib显示结果
- plt.imshow(Image.open(img_path))
- plt.title(f"Predicted: {predicted_char}")
- plt.axis("off")
- plt.show()