需求说明
- 单个很大的json数组文件,例如10g。如果一次性加载到内存可能导致内存溢出,因此使用按行读取,读取完成后缓存到一定量就做业务处理,再请空缓存。
- 单个json文件存储格式是标准的json数组格式
json数据示例
[
{"概念":"人物","name":"张三",
"身高":"176cm"
},{"概念":"人物","name":"李四",
"身高":"176cm","年龄":"22岁"},{"概念":"地点","name":"四川","人口":"5000万"},
{"概念":"人物",
"name":"张三1","身高":"176cm"}
,{"概念":"人物","name":"李四1",
"身高":"176cm","年龄":"22岁"
},
{"概念":"地点","name":"四川1","人口":"5000万"},{"概念":"人物","name":"张三2","身高":"176cm"},{"概念":"人物",
"name":"李四3","身高":"176cm","年龄":"22岁"},{"概念":"地点","name":"四川2","人口":"5000万"},{"概念":"人物","name":"张三3","身高":"176cm","体重":"70kg","爱好":"篮球"},
{"概念":"人物","name":"李四3","身高":"176cm","年龄":"22岁"},{"概念":"地点","name":"四川3","人口":"5000万"
}
]
pom依赖
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.2.28version>
dependency>
<dependency>
<groupId>org.apache.commonsgroupId>
<artifactId>commons-lang3artifactId>
<version>3.11version>
dependency>
<dependency>
<groupId>commons-iogroupId>
<artifactId>commons-ioartifactId>
<version>2.11.0version>
dependency>
处理核心类
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
import scala.util.parsing.combinator.testing.Str;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class HandlerData {
public static void main(String[] args) throws IOException {
String path = "test.json";
LineIterator it = FileUtils.lineIterator(new File(path), "UTF-8");
List<String> buffer = new ArrayList<>(1);
buffer.add("");
List<String> jsonList = new ArrayList<>();
Integer batchSize = 500;
while (it.hasNext()) {
String line = it.nextLine();
if (StringUtils.isNotBlank(line)) {
getNode(line, buffer, jsonList);
}
if (jsonList.size() > batchSize) {
jsonList.clear();
}
}
if (!CollectionUtils.isEmpty(jsonList)) {
jsonList.clear();
}
}
public static boolean changeJSON(String item) {
try {
JSON.parseObject(item);
return true;
} catch (Exception e) {
System.out.println("json装换异常:{}" + item);
return false;
}
}
public static void getNode(String line, List<String> buffer, List<String> jsonList) {
if (!line.contains("{") && !line.contains("}")) {
if (StringUtils.isNotBlank(buffer.get(0))) {
buffer.set(0, buffer.get(0).concat(line));
}
} else if (line.contains("{") && !line.contains("}")) {
buffer.set(0, buffer.get(0).concat(line.substring(line.indexOf("{"))));
} else if (!line.contains("{") && line.contains("}")) {
buffer.set(0, buffer.get(0).concat(line.substring(0, line.indexOf("}") + 1)));
jsonList.add(buffer.get(0));
buffer.set(0, "");
} else {
int t1 = line.indexOf("{");
int t2 = line.indexOf("}");
if (t1 > t2) {
buffer.set(0, buffer.get(0).concat(line.substring(0, t2 + 1)));
jsonList.add(buffer.get(0));
buffer.set(0, "");
line = line.substring(t2 + 1);
getNode(line, buffer, jsonList);
} else {
buffer.set(0, buffer.get(0).concat(line.substring(line.indexOf("{"), line.indexOf("}") + 1)));
jsonList.add(buffer.get(0));
buffer.set(0, "");
line = line.substring(line.indexOf("}") + 1);
getNode(line, buffer, jsonList);
}
}
}
}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81