c++性能优化

原创
2022/01/29 16:30
阅读数 136
#include <iostream>
#include <atomic>
#include "string"
#include "memory"
#include "vector"
#include "thread"
#include <cstddef>
#include <vector>

inline long add(std::vector<int> &vector, int s, int n) {
    long sum = 0;
//#pragma omp parallel for
    for (int i = s; i < n; ++i) {
        sum = sum + vector[i];
    }
    return sum;
}

inline long add1(std::vector<int> &vector, int s, int n) {
    std::atomic_long sum(0);
#pragma omp parallel for num_threads(6)
    for (int i = s; i < n; ++i) {
        sum += vector[i];
    }
    return sum;
}

inline long add2(std::vector<int> &vector, int s, int n) {
    std::atomic_long sum(0);
    sum = 0;
#pragma omp parallel for num_threads(6)
    for (int i = s; i < n; i = i + vector.size() / 6) {
        sum += add(vector, i, i + vector.size() / 6);
    }
    return sum;
}

inline long add3(int *a, int *b, int n) {
    long total = 0;

#pragma omp simd reduction(+:total)
    for (int i = 0; i < n; ++i) {
        total += a[i] + b[i];
    }
    return total;
}

int main() {
    std::size_t size = 300000000;
    std::vector<int> ints(size);
    for (int i = 0; i < size; ++i) {
        ints[i] = i % 3;
    }
//    long sum = add(ints, 0, ints.size());
//    long sum = add1(ints, 0, ints.size());
//    long sum = add2(ints, 0, ints.size());
    long sum = add3(&ints[0], &ints[ints.size()/2], ints.size()/2);
    std::cout << sum << std::endl;
    return 0;
}

 

cmake_minimum_required(VERSION 3.19)
project(untitled2)
set(CMAKE_CXX_STANDARD 17)
find_package(OpenMP)
if (OPENMP_FOUND)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} -O3 -march=native -flto")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -O3 -march=native -flto")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif ()
add_executable(untitled2 main.cpp)
展开阅读全文
加载中
点击引领话题📣 发布并加入讨论🔥
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部