提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
目录
相关文章导读
Unicode编码标准以及UTF(涉及到文本文件处理一定要看)https://blog.csdn.net/wangxudongx/article/details/125126524
如果代码和注释不一致, 则可能两者都错了。
-------------------------------------------------------- 诺姆.施赖尔
正则表达式也是有多个标准,这里我们采用C++默认的ECMAscript标准来介绍语法。
R"(<(.*?)>(.*?)</.*?>)"
会得到三个result:
1.(<(.*?)>(.*?)</.*?>)匹配出<b>bouquet of roses</b>
2.(.*?)匹配出<b>bouquet of roses</b>的<b>的b
3.(.*?)匹配出<b>bouquet of roses</b>的bouquet of roses
因为默认模式匹配函数会在第一次匹配后继续匹配嵌套的子表达式,所有又查出了里面两个()表达式匹配内容
构造时加flat:regex_constants::nosubs
会得到一个result:
<b>bouquet of roses</b>
<b>bouquet of roses</b>
R"(<(.*?)>(.*?)</.*?>)"
会得到三个result:
1.(<(.*?)>(.*?)</.*?>)匹配出<b>bouquet of roses</b>
2.(.*?)匹配出<b>bouquet of roses</b>的<b>的b
3.(.*?)匹配出<b>bouquet of roses</b>的bouquet of roses
因为默认模式匹配函数会在第一次匹配后继续匹配嵌套的子表达式,所有又查出了里面两个()表达式匹配内容
构造时加flat:regex_constants::nosubs
会得到一个result:
<b>bouquet of roses</b>
- #include <iostream>
- #include <string>
- #include <regex>
- #include <fstream>
- #include <codecvt>
-
- using namespace std;
-
- void testRegexMatchFunc()
- {
- wchar_t *pwstr = L"hello world,\n tom and Jerry";
- char *pstr = "hello world,\n tom and Jerry";
- string str = "hello world,\n tom and<b>bouquet of roses</b> Jerry";
- string str2 = "hello 2022-01-22 world,\n tom and<b>bouquet of roses</b> Jerry http://baidu.com 2022-05-01 heihiei\t\n 293743@qq.com lasjdfljds@gmail.com";
- string str3 = "tomcat";
-
-
- regex urlPattern("[a-zA-z]+://[^\s]*");
- regex datePattern("([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})-(((0[13578]|1[02])-(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)-(0[1-9]|[12][0-9]|30))|(02-(0[1-9]|[1][0-9]|2[0-8])))", regex_constants::nosubs);
- regex emailPattern(R"([\w!#$%&'*+/=?^_`{|}~-]+(?:\.[\w!#$%&'*+/=?^_`{|}~-]+)*@(?:[\w](?:[\w-]*[\w])?\.)+[\w](?:[\w-]*[\w])?)");
- regex xmlPattern(R"(<(.*?)>(.*?)</.*?>)", regex_constants::nosubs);
-
- smatch result;
-
- regex pattern("[a-zA-z]+://[^\s]*", regex_constants::ECMAScript);
-
- bool found = regex_match(pstr, pattern);
-
- found = regex_match(str3, result, pattern);
- if (found)
- {
- string e;
- for each (e in result)
- {
- cout << e << endl;
- }
- }
-
- found = regex_search(str3, result, pattern);
- if (found)
- {
- string e;
- for each (e in result)
- {
- cout << e << endl;
- }
- }
-
- auto urlPatFlags = urlPattern.flags();
- cout << urlPatFlags << endl;
-
- found = regex_search(str2, result, urlPattern);
- if (found)
- {
- string e;
- for each (e in result)
- {
- cout << e << endl;
- }
- }
-
- found = regex_search(str2, result, datePattern);
- if (found)
- {
- string e;
- for each (e in result)
- {
- cout << e << endl;
- }
- }
-
- found = regex_search(str2, result, emailPattern);
- if (found)
- {
- string e;
- for each (auto e2 in result)
- {
- cout << *e2.first << endl;
- cout << *e2.second << endl;
- cout << e2.str() << endl;
- }
- }
-
- found = regex_search(str2, result, xmlPattern);
- if (found)
- {
- string e;
- for each (auto e2 in result)
- {
- cout << *e2.first << endl;
- cout << *e2.second << endl;
- cout << e2.str() << endl;
- }
- }
-
- regex pattern2("<(.*?)>(.*?)</\\1>");
- found = regex_match(str, result, pattern2);
- cout << found << result.length() << endl;
-
- regex pattern3(R"(([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})-(((0[13578]|1[02])-(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)-(0[1-9]|[12][0-9]|30))|(02-(0[1-9]|[1][0-9]|2[0-8]))))");
- found = regex_match(str2, result, pattern3);
- cout << found << result.length() << endl;
- }
-
- void testRegexSearchFunc()
- {
- wchar_t *pwstr = L"hello world,\n tom and Jerry";
- char *pstr = "hello world,\n tom and Jerry";
- string str = "hello world,\n tom and<b>bouquet of roses</b> Jerry lsjdfl@outlook.com hshdfehjf@qq.com";
-
- regex pattern(R"(<(.*?)>(.*?)</\1>)");
- regex pat1("hello\S*");
- regex emailPattern(R"([\w!#$%&'*+/=?^_`{|}~-]+(?:\.[\w!#$%&'*+/=?^_`{|}~-]+)*@(?:[\w](?:[\w-]*[\w])?\.)+[\w](?:[\w-]*[\w])?)");
-
- // pattern.egrep = true;
-
- smatch result;
- bool found = regex_match(str, result, pattern);
-
- cout << found << endl;
-
- found = regex_search(str, result, emailPattern);
- cout << found << endl;
- }
-
- void test_nosubs_flag_func() {
- cout << "######################### 测试nosubs flag start #####################" << endl;
- string str2 = "hello 2022-01-22 world,\n tom and<b>bouquet of roses</b> Jerry http://baidu.com 2022-05-01 heihiei\t\n 293743@qq.com lasjdfljds@gmail.com";
- regex xmlPattern(R"(<(.*?)>(.*?)</.*?>)");
- regex xmlPatternWithNosubs(R"(<(.*?)>(.*?)</.*?>)", regex_constants::nosubs);
- smatch result;
- bool found;
-
- found = regex_search(str2, result, xmlPattern);
- if (found)
- {
- for each (string e2 in result)
- {
- cout << e2 << endl;
- }
- }
-
- found = regex_search(str2, result, xmlPatternWithNosubs);
- if (found)
- {
- for each (string e2 in result)
- {
- cout << e2 << endl;
- }
- }
-
- cout << "######################### 测试nosubs flag end #####################" << endl;
-
- }
-
- void TestUnicodeUTF16()
- {
- wifstream textFileInputStream("./utf16textfile.txt");
- textFileInputStream.imbue(locale(locale::classic(), new codecvt_utf16<wchar_t>()));
- if (textFileInputStream.is_open())
- {
- wstring wstr;
- std::streamsize readLimitSize = 1024;
-
- while (!textFileInputStream.eof())
- {
- textFileInputStream >> wstr;
- wcout << wstr << endl;
- }
- }
-
- textFileInputStream.clear();
- textFileInputStream.close();
- }
-
- int main()
- {
-
- {
- // R字面值的使用,适合用于定义regex pattern,不需要手动加反斜杠转义
- string str_r_1 = R"(
- {
- "id": "velit enim ipsum nostrud nisi",
- "name": "ut dolore quis mollit in",
- "namePinyin": "ullamco",
- "namePy": "id in mollit",
- "username": "proident nulla aliquip",
- "sex": -10581978.03552425,
- "jobNumber": "mollit in anim",
- "mobile": "incididunt qui laborum do",
- "email": "exercitation magna labore do anim",
- "rfid": "velit officia consequat qui",
- "avatarLink": "in incididunt amet",
- "preferences": {
- "lang": "irure elit amet ea"
- }
- }
- )";
-
- cout << str_r_1 << endl;
-
- }
-
-
- testRegexMatchFunc();
- testRegexSearchFunc();
- test_nosubs_flag_func();
-
- TestUnicodeUTF16();
-
- _wsystem(L"pause");
- return 0;
- }
输出
-
- {
- "id": "velit enim ipsum nostrud nisi",
- "name": "ut dolore quis mollit in",
- "namePinyin": "ullamco",
- "namePy": "id in mollit",
- "username": "proident nulla aliquip",
- "sex": -10581978.03552425,
- "jobNumber": "mollit in anim",
- "mobile": "incididunt qui laborum do",
- "email": "exercitation magna labore do anim",
- "rfid": "velit officia consequat qui",
- "avatarLink": "in incididunt amet",
- "preferences": {
- "lang": "irure elit amet ea"
- }
- }
-
- 1
- http://baidu.com 2022-05-01 heihiei
- 293743@qq.com la
- 2022-01-22
- 2
-
- 293743@qq.com
- <
-
- <b>bouquet of roses</b>
- 00
- 00
- 0
- 1
- ######################### 测试nosubs flag start #####################
- <b>bouquet of roses</b>
- b
- bouquet of roses
- <b>bouquet of roses</b>
- ######################### 测试nosubs flag end #####################
- 请按任意键继续. . .