- 01
- 02
- 03
- 04
- 05
- 06
- 07
- 08
- 09
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
let src1 = r#"
__kernel void add1(__global float* A, __global float* BBB, __global float* B, int m, int n)
{
__local float Blo[64];
int x = get_local_id(0);
int y = get_local_id(1);
int i = get_global_id(0);
int j = get_global_id(1);
int k = get_global_id(2);
i += k / 8;
j += k % 8;
if (i >= n || j >= m) return;
Blo[x * 8 + y] = A[i * m + j];
barrier(CLK_LOCAL_MEM_FENCE);
float BB = 0;
for (int xx = 0; xx < 8; ++xx)
for (int yy = 0; yy < 8; ++yy)
{
float c = (2 * xx + 1) * x * 3.1415926535 / 16;
float cc = (2 * yy + 1) * y * 3.1415926535 / 16;
c = cos(c);
cc = cos(cc);
BB += Blo[xx * 8 + yy] * c * cc;
}
float Ci, Cj;
if (x == 0)
Ci = 1 / 1.4142135623;
else
Ci = 1;
if (y == 0)
Cj = 1 / 1.4142135623;
else
Cj = 1;
B[k * m * n + i * m + j] = Ci * Cj / 4 * BB;
barrier(CLK_LOCAL_MEM_FENCE);
i = get_global_id(0);
j = get_global_id(1);
float summ = 0;
for (int ii = 0; ii < 64; ++ii)
summ += B[ii * m * n + i * m + j];
BBB[i * m + j] = summ / 64;
}
"#;
let pro_que = ProQue::builder().src(src1).dims((hi, wi, 64)).build().unwrap();
let matr11 = Buffer::builder()
.queue(pro_que.queue().clone())
.flags(MemFlags::new().read_only().use_host_ptr())
.dims((hi, wi))
.host_data(&Resr)
.build().unwrap();
let matg11 = Buffer::builder()
.queue(pro_que.queue().clone())
.flags(MemFlags::new().read_only().use_host_ptr())
.dims((hi, wi))
.host_data(&Resg)
.build().unwrap();
...
let mut kernel;
{
let wi = wi as i32;
let hi = hi as i32;
kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matr11).arg_buf(&resr11).arg_buf(&bor1).arg_scl(wi).arg_scl(hi);
kernel.lws((8, 8)).enq().unwrap();
kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matg11).arg_buf(&resg11).arg_buf(&bog1).arg_scl(wi).arg_scl(hi);
kernel.lws((8, 8)).enq().unwrap();
kernel = pro_que.create_kernel("add1").unwrap().arg_buf(&matb11).arg_buf(&resb11).arg_buf(&bob1).arg_scl(wi).arg_scl(hi);
kernel.lws((8, 8)).enq().unwrap();
}