Grok  7.6.6
WaveletFwd.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2016-2021 Grok Image Compression Inc.
3  *
4  * This source code is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU Affero General Public License, version 3,
6  * as published by the Free Software Foundation.
7  *
8  * This source code is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU Affero General Public License for more details.
12  *
13  * You should have received a copy of the GNU Affero General Public License
14  * along with this program. If not, see <http://www.gnu.org/licenses/>.
15  *
16  */
17 
18 #pragma once
19 
20 #include "grk_includes.h"
21 
22 namespace grk {
23 
24 template <typename DWT> class WaveletForward
25 {
26 
27 public:
32  bool run(TileComponent *tilec);
33 
34  /* <summary> */
35  /* Forward lazy transform (vertical). */
36  /* </summary> */
37  void deinterleave_v(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n,
38  uint32_t stride, int32_t parity);
39 
40 
41  /* <summary> */
42  /* Forward lazy transform (horizontal). */
43  /* </summary> */
44  void deinterleave_h(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n,
45  int32_t parity) ;
46 };
47 
48 
49 /* <summary> */
50 /* Forward lazy transform (vertical). */
51 /* </summary> */
52 template <typename DWT> void WaveletForward<DWT>::deinterleave_v(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n,
53  uint32_t stride, int32_t parity) {
54  uint32_t i = s_n;
55  int32_t *dest = b;
56  int32_t *src = a + parity;
57 
58  while (i--) {
59  *dest = *src;
60  dest += stride;
61  src += 2;
62  }
63 
64  dest = b + s_n * stride;
65  src = a + 1 - parity;
66 
67  i = d_n;
68  while (i--) {
69  *dest = *src;
70  dest += stride;
71  src += 2;
72  }
73 }
74 
75 /* <summary> */
76 /* Forward lazy transform (horizontal). */
77 /* </summary> */
78 template <typename DWT> void WaveletForward<DWT>::deinterleave_h(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n,
79  int32_t parity) {
80  int32_t *dest = b;
81  int32_t *src = a + parity;
82 
83  for (uint32_t i = 0; i < s_n; ++i) {
84  *dest++ = *src;
85  src += 2;
86  }
87 
88  dest = b + s_n;
89  src = a + 1 - parity;
90 
91  for (uint32_t i = 0; i < d_n; ++i) {
92  *dest++ = *src;
93  src += 2;
94  }
95 }
96 
97 
102 template <typename DWT> bool WaveletForward<DWT>::run(TileComponent *tilec){
103  if (tilec->numresolutions == 1U)
104  return true;
105 
106  size_t l_data_size = max_resolution(tilec->resolutions,
107  tilec->numresolutions) * sizeof(int32_t);
108  /* overflow check */
109  if (l_data_size > SIZE_MAX) {
110  GRK_ERROR("WaveletFwdImpl compress: overflow");
111  return false;
112  }
113  if (!l_data_size)
114  return false;
115 
116  bool rc = true;
117  uint32_t rw,rh,rw_next,rh_next;
118  uint8_t cas_row,cas_col;
119  uint32_t stride = tilec->getBuffer()->getWindow()->stride;
120  uint32_t num_decomps = (uint32_t) (tilec->numresolutions - 1);
121  auto a = tilec->getBuffer()->getWindow()->data;
122  auto cur_res = tilec->resolutions + num_decomps;
123  auto next_res = cur_res - 1;
124 
125  int32_t** bj_array = new int32_t*[ThreadPool::get()->num_threads()];
126  for (uint32_t i = 0; i < ThreadPool::get()->num_threads(); ++i){
127  bj_array[i] = nullptr;
128  }
129  for (uint32_t i = 0; i < ThreadPool::get()->num_threads(); ++i){
130  bj_array[i] = (int32_t*)grk_aligned_malloc(l_data_size);
131  if (!bj_array[i]){
132  rc = false;
133  goto cleanup;
134  }
135  }
136 
137  for (uint32_t decompno = 0; decompno < num_decomps; ++decompno) {
138 
139  /* width of the resolution level computed */
140  rw = cur_res->x1 - cur_res->x0;
141  /* height of the resolution level computed */
142  rh = cur_res->y1 - cur_res->y0;
143  // width of the next resolution level
144  rw_next = next_res->x1 - next_res->x0;
145  //height of the next resolution level
146  rh_next = next_res->y1 - next_res->y0;
147 
148  /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */
149  cas_row = cur_res->x0 & 1;
150  /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */
151  cas_col = cur_res->y0 & 1;
152 
153  // transform vertical
154  if (rw) {
155  const uint32_t linesPerThreadV = static_cast<uint32_t>(std::ceil((float)rw / (float)ThreadPool::get()->num_threads()));
156  const uint32_t s_n = rh_next;
157  const uint32_t d_n = rh - rh_next;
158  if (ThreadPool::get()->num_threads() == 1){
159  DWT wavelet;
160  for (auto m = 0U;m < std::min<uint32_t>(linesPerThreadV, rw); ++m) {
161  auto bj = bj_array[0];
162  auto aj = a + m;
163  for (uint32_t k = 0; k < rh; ++k)
164  bj[k] = aj[k * stride];
165  wavelet.compress_line(bj, (int32_t)d_n, (int32_t)s_n, cas_col);
166  deinterleave_v(bj, aj, d_n, s_n, stride, cas_col);
167  }
168  } else {
169  std::vector< std::future<int> > results;
170  for(uint32_t i = 0; i < ThreadPool::get()->num_threads(); ++i) {
171  uint32_t index = i;
172  results.emplace_back(
173  ThreadPool::get()->enqueue([this, index, bj_array,a,
174  stride, rw,rh,
175  d_n, s_n, cas_col,
176  linesPerThreadV] {
177  DWT wavelet;
178  for (uint32_t m = index * linesPerThreadV;
179  m < std::min<uint32_t>((index+1)*linesPerThreadV, rw); ++m) {
180  auto bj = bj_array[index];
181  auto aj = a + m;
182  for (uint32_t k = 0; k < rh; ++k)
183  bj[k] = aj[k * stride];
184  wavelet.compress_line(bj, (int32_t)d_n, (int32_t)s_n, cas_col);
185  deinterleave_v(bj, aj, d_n, s_n, stride, cas_col);
186  }
187  return 0;
188  })
189  );
190  }
191  for(auto &result: results)
192  result.get();
193  }
194  }
195 
196  // transform horizontal
197  if (rh){
198  const uint32_t s_n = rw_next;
199  const uint32_t d_n = rw - rw_next;
200  const uint32_t linesPerThreadH = static_cast<uint32_t>(std::ceil((float)rh / (float)ThreadPool::get()->num_threads()));
201  if (ThreadPool::get()->num_threads() == 1){
202  DWT wavelet;
203  for (auto m = 0U;m < std::min<uint32_t>(linesPerThreadH, rh); ++m) {
204  auto bj = bj_array[0];
205  auto aj = a + m * stride;
206  memcpy(bj,aj,rw << 2);
207  wavelet.compress_line(bj, (int32_t)d_n, (int32_t)s_n, cas_row);
208  deinterleave_h(bj, aj, d_n, s_n, cas_row);
209  }
210 
211  } else {
212  std::vector< std::future<int> > results;
213  for(uint32_t i = 0; i < ThreadPool::get()->num_threads(); ++i) {
214  uint32_t index = i;
215  results.emplace_back(
216  ThreadPool::get()->enqueue([this, index, bj_array,a,
217  stride, rw,rh,
218  d_n, s_n, cas_row,
219  linesPerThreadH] {
220  DWT wavelet;
221  for (auto m = index * linesPerThreadH;
222  m < std::min<uint32_t>((index+1)*linesPerThreadH, rh); ++m) {
223  int32_t *bj = bj_array[index];
224  int32_t *aj = a + m * stride;
225  memcpy(bj,aj,rw << 2);
226  wavelet.compress_line(bj, (int32_t)d_n, (int32_t)s_n, cas_row);
227  deinterleave_h(bj, aj, d_n, s_n, cas_row);
228  }
229  return 0;
230  })
231  );
232  }
233  for(auto &result: results)
234  result.get();
235  }
236  }
237  cur_res = next_res;
238  next_res--;
239  }
240 cleanup:
241  for (uint32_t i = 0; i < ThreadPool::get()->num_threads(); ++i)
242  grk_aligned_free(bj_array[i]);
243  delete[] bj_array;
244  return rc;
245 }
246 
247 }
#define SIZE_MAX
Definition: MemManager.cpp:33
auto enqueue(F &&f, Args &&... args) -> std::future< typename std::invoke_result< F, Args... >::type >
Definition: ThreadPool.hpp:151
size_t num_threads()
Definition: ThreadPool.hpp:49
static ThreadPool * get()
Definition: ThreadPool.hpp:51
Definition: WaveletFwd.h:25
void deinterleave_h(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n, int32_t parity)
Definition: WaveletFwd.h:78
void deinterleave_v(int32_t *a, int32_t *b, uint32_t d_n, uint32_t s_n, uint32_t stride, int32_t parity)
Definition: WaveletFwd.h:52
bool run(TileComponent *tilec)
Forward wavelet transform in 2-D.
Definition: WaveletFwd.h:102
Copyright (C) 2016-2021 Grok Image Compression Inc.
Definition: BitIO.cpp:23
void GRK_ERROR(const char *fmt,...)
Definition: logger.cpp:57
void * grk_aligned_malloc(size_t size)
Allocate memory aligned to a 16 byte boundary.
Definition: MemManager.cpp:119
uint32_t max_resolution(Resolution *GRK_RESTRICT r, uint32_t i)
Definition: WaveletReverse.cpp:33
void grk_aligned_free(void *ptr)
Definition: MemManager.cpp:123
Definition: TileComponent.h:30
uint8_t numresolutions
Definition: TileComponent.h:57
TileComponentWindowBuffer< int32_t > * getBuffer() const
Definition: TileComponent.cpp:312
Resolution * resolutions
Definition: TileComponent.h:56
const grk_buffer_2d< T > * getWindow(uint8_t resno, eBandOrientation orientation) const
Get non-LL band window.
Definition: TileComponentWindowBuffer.h:333