• 数组复制之System.arraycopy


    为了测试俩者的区别我写了一个简单赋值int[100000]的程序来对比,并且中间使用了nanoTime来计算时间差:

    程序如下:

    1. int[] a = new int[100000];
    2. for(int i=0;i
    3. a[i] = i;
    4. }
    5. int[] b = new int[100000];
    6. int[] c = new int[100000];
    7. for(int i=0;i
    8. c[i] = i;
    9. }
    10. int[] d = new int[100000];
    11. for(int k=0;k<10;k++){
    12. long start1 = System.nanoTime();
    13. for(int i=0;i
    14. b[i] = a[i];
    15. }
    16. long end1 = System.nanoTime();
    17. System.out.println("end1 - start1 = "+(end1-start1));
    18. long start2 = System.nanoTime();
    19. System.arraycopy(c, 0, d, 0, 100000);
    20. long end2 = System.nanoTime();
    21. System.out.println("end2 - start2 = "+(end2-start2));
    22. System.out.println();
    23. }

    为了避免内存不稳定干扰和运行的偶然性结果,我在一开始的时候把所有空间申明完成,并且只之后循环10次执行,得到如下结果:

    1. end1 - start1 = 366806
    2. end2 - start2 = 109154
    3. end1 - start1 = 380529
    4. end2 - start2 = 79849
    5. end1 - start1 = 421422
    6. end2 - start2 = 68769
    7. end1 - start1 = 344463
    8. end2 - start2 = 72020
    9. end1 - start1 = 333174
    10. end2 - start2 = 77277
    11. end1 - start1 = 377335
    12. end2 - start2 = 82285
    13. end1 - start1 = 370608
    14. end2 - start2 = 66937
    15. end1 - start1 = 349067
    16. end2 - start2 = 86532
    17. end1 - start1 = 389974
    18. end2 - start2 = 83362
    19. end1 - start1 = 347937
    20. end2 - start2 = 63638

    可以看出,System.arraycopy的性能很不错,为了看看究竟这个底层是如何处理的,我找到openJDK的一些代码留恋了一些:

    System.arraycopy是一个native函数,需要看native层的代码:

    1. public static native void arraycopy(Object src, int srcPos,
    2. Object dest, int destPos,
    3. int length);

    找到对应的openjdk6-src/hotspot/src/share/vm/prims/jvm.cpp,这里有JVM_ArrayCopy的入口:

    1. JVM_ENTRY(void, JVM_ArrayCopy(JNIEnv *env, jclass ignored, jobject src, jint src_pos,
    2. jobject dst, jint dst_pos, jint length))
    3. JVMWrapper("JVM_ArrayCopy");
    4. // Check if we have null pointers
    5. if (src == NULL || dst == NULL) {
    6. THROW(vmSymbols::java_lang_NullPointerException());
    7. }
    8. arrayOop s = arrayOop(JNIHandles::resolve_non_null(src));
    9. arrayOop d = arrayOop(JNIHandles::resolve_non_null(dst));
    10. assert(s->is_oop(), "JVM_ArrayCopy: src not an oop");
    11. assert(d->is_oop(), "JVM_ArrayCopy: dst not an oop");
    12. // Do copy
    13. Klass::cast(s->klass())->copy_array(s, src_pos, d, dst_pos, length, thread);
    14. JVM_END

    前面的语句都是判断,知道最后的copy_array(s, src_pos, d, dst_pos, length, thread)是真正的copy,进一步看这里,在openjdk6-src/hotspot/src/share/vm/oops/typeArrayKlass.cpp中:

    1. void typeArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d, int dst_pos, int length, TRAPS) {
    2. assert(s->is_typeArray(), "must be type array");
    3. // Check destination
    4. if (!d->is_typeArray() || element_type() != typeArrayKlass::cast(d->klass())->element_type()) {
    5. THROW(vmSymbols::java_lang_ArrayStoreException());
    6. }
    7. // Check is all offsets and lengths are non negative
    8. if (src_pos < 0 || dst_pos < 0 || length < 0) {
    9. THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());
    10. }
    11. // Check if the ranges are valid
    12. if ( (((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length())
    13. || (((unsigned int) length + (unsigned int) dst_pos) > (unsigned int) d->length()) ) {
    14. THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());
    15. }
    16. // Check zero copy
    17. if (length == 0)
    18. return;
    19. // This is an attempt to make the copy_array fast.
    20. int l2es = log2_element_size();
    21. int ihs = array_header_in_bytes() / wordSize;
    22. char* src = (char*) ((oop*)s + ihs) + ((size_t)src_pos << l2es);
    23. char* dst = (char*) ((oop*)d + ihs) + ((size_t)dst_pos << l2es);
    24. Copy::conjoint_memory_atomic(src, dst, (size_t)length << l2es);//还是在这里处理copy
    25. }

    这个函数之前的仍然是一堆判断,直到最后一句才是真实的拷贝语句。

    在openjdk6-src/hotspot/src/share/vm/utilities/copy.cpp中找到对应的函数:

    1. // Copy bytes; larger units are filled atomically if everything is aligned.
    2. void Copy::conjoint_memory_atomic(void* from, void* to, size_t size) {
    3. address src = (address) from;
    4. address dst = (address) to;
    5. uintptr_t bits = (uintptr_t) src | (uintptr_t) dst | (uintptr_t) size;
    6. // (Note: We could improve performance by ignoring the low bits of size,
    7. // and putting a short cleanup loop after each bulk copy loop.
    8. // There are plenty of other ways to make this faster also,
    9. // and it's a slippery slope. For now, let's keep this code simple
    10. // since the simplicity helps clarify the atomicity semantics of
    11. // this operation. There are also CPU-specific assembly versions
    12. // which may or may not want to include such optimizations.)
    13. if (bits % sizeof(jlong) == 0) {
    14. Copy::conjoint_jlongs_atomic((jlong*) src, (jlong*) dst, size / sizeof(jlong));
    15. } else if (bits % sizeof(jint) == 0) {
    16. Copy::conjoint_jints_atomic((jint*) src, (jint*) dst, size / sizeof(jint));
    17. } else if (bits % sizeof(jshort) == 0) {
    18. Copy::conjoint_jshorts_atomic((jshort*) src, (jshort*) dst, size / sizeof(jshort));
    19. } else {
    20. // Not aligned, so no need to be atomic.
    21. Copy::conjoint_jbytes((void*) src, (void*) dst, size);
    22. }
    23. }

    上面的代码展示了选择哪个copy函数,我们选择conjoint_jints_atomic,在openjdk6-src/hotspot/src/share/vm/utilities/copy.hpp进一步查看:

    1. // jints, conjoint, atomic on each jint
    2. static void conjoint_jints_atomic(jint* from, jint* to, size_t count) {
    3. assert_params_ok(from, to, LogBytesPerInt);
    4. pd_conjoint_jints_atomic(from, to, count);
    5. }

    继续向下查看,在openjdk6-src/hotspot/src/cpu/zero/vm/copy_zero.hpp中:

    1. static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
    2. _Copy_conjoint_jints_atomic(from, to, count);
    3. }

    继续向下查看,在openjdk6-src/hotspot/src/os_cpu/linux_zero/vm/os_linux_zero.cpp中:

    1. void _Copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
    2. if (from > to) {
    3. jint *end = from + count;
    4. while (from < end)
    5. *(to++) = *(from++);
    6. }
    7. else if (from < to) {
    8. jint *end = from;
    9. from += count - 1;
    10. to += count - 1;
    11. while (from >= end)
    12. *(to--) = *(from--);
    13. }
    14. }

    可以看到,直接就是内存块赋值的逻辑了,这样避免很多引用来回倒腾的时间,必然就变快了。

  • 相关阅读:
    内存模型以及如何判定对象已死问题
    聊聊日志硬扫描,阿里 Log Scan 的设计与实践
    面试官喜欢问Nacos原理?直接把这篇文章甩给他!
    redis的配置文件
    Day 91
    C现代方法(第3、4章)笔记
    【基础教程】Matlab实现指数威布尔分布
    数据结构(九)顺序栈
    【Java Web】统一处理异常
    如何删除gitlab上多余的文件夹
  • 原文地址:https://blog.csdn.net/xiaopangcame/article/details/134296614