ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

OpenCL矩阵相乘输入形式

2020-12-05 12:00:28  阅读:238  来源: 互联网

标签:status OpenCL cl int Mdim float 矩阵 相乘 NULL


一.核函数(运行在gpu上)

__kernel void matrix_mult(
    const int Ndim,
    const int Mdim,
    const int Pdim,
    __global const float* A,
    __global const float* B,
    __global float* C)
{
//两个线程同时执行
    int i = get_global_id(0);
    int j = get_global_id(1);

    int k;
    float tmp;

    if ((i < Ndim) && (j < Mdim)) {
        tmp = 0.0;
        for (k = 0; k < Pdim; k++)
            tmp += A[i * Pdim + k] * B[k * Mdim + j];
        C[i * Mdim + j] = tmp;
    }
}

二. 输入矩阵形式(博主列举了三种形式的矩阵输入,代码中通过注释给出:(1)自动生成(2)直接给定(3)动态输入)


```c
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>

using namespace std;

#define NWITEMS 6

#pragma comment (lib,"OpenCL.lib")

//把文本文件读入一个 string 中
int convertToString(const char* filename, std::string& s)
{
    size_t size;
    char* str;
    std::fstream f(filename, (std::fstream::in | std::fstream::binary));
    if (f.is_open())
    {
        size_t fileSize;
        f.seekg(0, std::fstream::end);
        size = fileSize = (size_t)f.tellg();
        f.seekg(0, std::fstream::beg);
        str = new char[size + 1];
        if (!str)
        {
            f.close();
            return NULL;
        }
        f.read(str, fileSize);
        f.close();
        str[size] = '\0';
        s = str;
        delete[] str;
        return 0;
    }
    printf("Error: Failed to open file %s\n", filename);
    return 1;
}

int main()
{
    cl_uint status;
    cl_platform_id platform;

    //创建平台对象
    status = clGetPlatformIDs(1, &platform, NULL);
    cl_device_id device;
    //创建 GPU 设备
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    //创建context
    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
    //创建命令队列
    cl_command_queue commandQueue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, NULL);

    if (commandQueue == NULL)
        perror("Failed to create commandQueue for device 0.");

    //建立要传入从机的数据
    /********  创建内核和内存对象 ********/

    //输入定义矩阵的长宽
    int Ndim, Pdim, Mdim;
    cout << "请输入A矩阵长" << endl;
    cin >> Ndim;
    cout << "请输入A矩阵宽" << endl;
    cin >> Pdim;
    cout << "请输入B矩阵宽" << endl;
    cin >> Mdim;
    int szA = Ndim * Pdim;
    int szB = Pdim * Mdim;
    int szC = Ndim * Mdim;

    float* A;
    float* B;
    float* C;

    A = (float*)malloc(szA * sizeof(float));
    B = (float*)malloc(szB * sizeof(float));
    C = (float*)malloc(szC * sizeof(float));
    int i, j;
    /* (1)循环矩阵
          for (i = 0; i < szA; i++)
                A[i] = (float)((float)i + 1.0);
            for (i = 0; i < szB; i++)
                B[i] = (float)((float)i + 1.0);
      (2)测试手动输入的矩阵
                A[0] = 1;
                A[1] = 6;
                A[2] = 2;
                A[3] = 4;
                B[0] = 2;
                B[1] = 4;
                B[2] = 1;
                B[3] = 4;
下面是第三种,手动输入:
        */

    cout << "请输入矩阵A,按数组形式单个输入" << endl;
    for (int i = 0; i < szA; i++)
        cin >> A[i];
    cout << "请输入矩阵B,按数组形式单个输入" << endl;
    for (int i = 0; i < szB; i++)
        cin >> B[i];




    //创建三个 OpenCL 内存对象,并把buf1 的内容通过隐式拷贝的方式
    //拷贝到clbuf1, buf2 的内容通过显示拷贝的方式拷贝到clbuf2
    cl_mem memObjects[3] = { 0, 0, 0 };
    memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        sizeof(float) * szA, A, NULL);
    memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
        sizeof(float) * szB, B, NULL);
    memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
        sizeof(float) * szC, C, NULL);
    if (memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
        perror("Error in clCreateBuffer.\n");

    const char* filename = "lhl.cl";
    std::string sourceStr;
    status = convertToString(filename, sourceStr);
    if (status)
        cout << status << "  !!!!!!!!" << endl;
    const char* source = sourceStr.c_str();
    size_t sourceSize[] = { strlen(source) };
    //创建程序对象
    cl_program program = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
    //编译程序对象
    status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    if (status)
        cout << status << "  !!!!!!!!" << endl;
    if (status != 0)
    {
        printf("clBuild failed:%d\n", status);
        char tbuf[0x10000];
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf,
            NULL);
        printf("\n%s\n", tbuf);
        //return −1;
    }

    //创建 Kernel 对象
    cl_kernel kernel = clCreateKernel(program, "matrix_mult", NULL);

    //设置 Kernel 参数
    cl_int clnum = NWITEMS;
    status = clSetKernelArg(kernel, 0, sizeof(int), &Ndim);
    status = clSetKernelArg(kernel, 1, sizeof(int), &Mdim);
    status = clSetKernelArg(kernel, 2, sizeof(int), &Pdim);
    status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &memObjects[0]);
    status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &memObjects[1]);
    status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &memObjects[2]);
    if (status)
        cout << "参数设置错误" << endl;

    //执行 kernel
    size_t global[2];
    cl_event prof_event;
    cl_ulong ev_start_time = (cl_ulong)0;
    cl_ulong ev_end_time = (cl_ulong)0;
    double rum_time;
    global[0] = (size_t)Ndim;
    global[1] = (size_t)Mdim;
    status = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL,
        global, NULL, 0, NULL, &prof_event);
    if (status)
        cout << "执行内核时错误" << endl;
    clFinish(commandQueue);

    //读取时间
    status = clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_QUEUED,
        sizeof(cl_ulong), &ev_start_time, NULL);
    status = clGetEventProfilingInfo(prof_event, CL_PROFILING_COMMAND_END,
        sizeof(cl_ulong), &ev_end_time, NULL);
    if (status)
        perror("读取时间的时候发生错误\n");
    rum_time = (double)(ev_end_time - ev_start_time);
    cout << "执行时间为:" << rum_time << endl;

    //数据拷回 host 内存
    status = clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0,
        sizeof(float) * szC, C, 0, NULL, NULL);
    if (status)
        perror("读回数据的时候发生错误\n");

    //结果显示
    printf("\nArray A:\n");
    for (i = 0; i < Ndim; i++) {
        for (j = 0; j < Pdim; j++)
            printf("%.1f\t", A[i * Pdim + j]);
        printf("\n");
    }
    printf("\nArray B:\n");
    for (i = 0; i < Pdim; i++) {
        for (j = 0; j < Mdim; j++)
            printf("%.1f\t", B[i * Mdim + j]);
        printf("\n");
    }
    printf("\nArray C:\n");
    for (i = 0; i < Ndim; i++) {
        for (j = 0; j < Mdim; j++)
            printf("%.1f\t", C[i * Mdim + j]);
        printf("\n");
    }




    cout << endl;

    if (A)
        free(A);
    if (B)
        free(B);
    if (C)
        free(C);

    //删除 OpenCL 资源对象
    clReleaseMemObject(memObjects[2]);
    clReleaseMemObject(memObjects[1]);
    clReleaseMemObject(memObjects[0]);
    clReleaseProgram(program);
    clReleaseCommandQueue(commandQueue);
    clReleaseContext(context);
    system("pause");

    return 0;
}

标签:status,OpenCL,cl,int,Mdim,float,矩阵,相乘,NULL
来源: https://blog.csdn.net/weixin_46286328/article/details/110678973

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有